add deploy health gate and host security ops scripts

This commit is contained in:
Nico 2026-02-14 18:12:35 -08:00
parent 4d09d7e5b4
commit fa3129bb1a
8 changed files with 179 additions and 3 deletions

View File

@ -42,6 +42,9 @@ jobs:
needs: build
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Trigger Dokploy Deploy
env:
DOKPLOY_DEPLOY_HOOK: ${{ secrets.DOKPLOY_DEPLOY_HOOK }}
@ -54,3 +57,15 @@ jobs:
curl -fsS -X POST "$DOKPLOY_DEPLOY_HOOK" \
-H "Content-Type: application/json" \
-d "{\"imageTag\":\"$IMAGE_TAG\"}"
- name: Wait for Ready Health Check
env:
HEALTH_URL: ${{ secrets.DOKPLOY_HEALTHCHECK_URL }}
MAX_ATTEMPTS: "30"
SLEEP_SECONDS: "10"
run: |
if [ -z "$HEALTH_URL" ]; then
echo "Missing DOKPLOY_HEALTHCHECK_URL secret"
exit 1
fi
bash scripts/wait-for-health.sh

View File

@ -113,6 +113,16 @@ Primary outcomes:
- Added operational smoke tooling for deploy/rollback validation:
- `scripts/smoke-public-launch.sh` checks health endpoints, `X-Request-Id`, and `request_id` response fields.
- Expanded `docs/public-launch-runbook.md` with deployment smoke and rollback checklist sections.
- Added host hardening and DR drill scripts:
- `scripts/harden-host-ufw.sh` (UFW baseline with dry-run default).
- `scripts/check-host-security.sh` (ports/firewall/fail2ban/docker status snapshot).
- `scripts/restore-drill-postgres.sh` (restore + validation query workflow).
- Updated docs to use executable operational checks in:
- `docs/public-launch-runbook.md`
- `docs/06_SECURITY_REVIEW.md`
- Added deploy health gate automation:
- `scripts/wait-for-health.sh` polls ready endpoint and verifies `request_id` payload.
- `.gitea/workflows/deploy-dokploy.yml` now runs post-deploy health verification using `DOKPLOY_HEALTHCHECK_URL`.
### Risks / Notes to Revisit
- Workspace is intentionally dirty; commits must be path-scoped to avoid mixing unrelated changes.

View File

@ -66,6 +66,6 @@ This document tracks launch-critical security findings for app, data, users, and
- [x] `npm run lint` passes (warnings acceptable for now).
- [x] `npm test` passes.
- [x] `npm run build` passes.
- [ ] Production host firewall rules verified.
- [ ] Production host firewall rules verified (`scripts/harden-host-ufw.sh` + `scripts/check-host-security.sh`).
- [ ] SSH restricted to VPN/allowlist.
- [ ] Backup restore drill logged for current week.
- [ ] Backup restore drill logged for current week (`scripts/restore-drill-postgres.sh`).

View File

@ -28,6 +28,10 @@
- `REGISTRY_USER`
- `REGISTRY_PASS`
- `DOKPLOY_DEPLOY_HOOK`
- `DOKPLOY_HEALTHCHECK_URL`
- Health gate:
- workflow calls `scripts/wait-for-health.sh` against `DOKPLOY_HEALTHCHECK_URL`
- default retry window: 5 minutes (30 attempts x 10s)
## 4) Reverse Proxy + Network Hardening
- Use `docker/nginx/fiddy.conf` as baseline.
@ -41,6 +45,10 @@
- Confirm Nginx writes JSON logs:
- `/var/log/nginx/fiddy-access.log`
- `/var/log/nginx/fiddy-error.log`
- Apply/verify host baseline using scripts:
- dry-run firewall apply: `SSH_ALLOW_CIDR=<your-cidr> DRY_RUN=1 scripts/harden-host-ufw.sh`
- real firewall apply: `SSH_ALLOW_CIDR=<your-cidr> DRY_RUN=0 sudo scripts/harden-host-ufw.sh`
- host status audit: `scripts/check-host-security.sh`
## 5) Observability
- Bring up monitoring stack:
@ -66,7 +74,7 @@
- Retention:
- default 7 days (`RETENTION_DAYS=7`)
- Restore drill:
- `scripts/restore-postgres.sh backups/postgres/<file>.dump <target_database_url>`
- `scripts/restore-drill-postgres.sh backups/postgres/<file>.dump <target_database_url>`
- Run restore drill on non-prod DB before public launch.
## 7) Incident Response Quick Flow

View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
set -euo pipefail
echo "== Host Security Check =="
echo "Timestamp (UTC): $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo
echo "== Listening TCP/UDP Ports =="
if command -v ss >/dev/null 2>&1; then
ss -tulpn || true
else
echo "ss command not found"
fi
echo
echo "== UFW Status =="
if command -v ufw >/dev/null 2>&1; then
ufw status verbose || true
else
echo "ufw command not found"
fi
echo
echo "== Fail2ban Status =="
if command -v fail2ban-client >/dev/null 2>&1; then
fail2ban-client status || true
else
echo "fail2ban-client command not found"
fi
echo
echo "== Docker Containers =="
if command -v docker >/dev/null 2>&1; then
docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}' || true
else
echo "docker command not found"
fi
echo
echo "== Quick Expected Surface =="
echo "- Public inbound expected: 80/tcp, 443/tcp only"
echo "- SSH expected: restricted source allowlist or VPN-only"
echo "- Postgres expected: private-only, not internet-exposed"

View File

@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -euo pipefail
if ! command -v ufw >/dev/null 2>&1; then
echo "ufw is not installed on this host."
exit 1
fi
SSH_ALLOW_CIDR="${SSH_ALLOW_CIDR:-}"
DRY_RUN="${DRY_RUN:-1}"
if [[ -z "$SSH_ALLOW_CIDR" ]]; then
echo "SSH_ALLOW_CIDR is required (example: SSH_ALLOW_CIDR=203.0.113.10/32)."
exit 1
fi
run_cmd() {
if [[ "$DRY_RUN" == "1" ]]; then
echo "+ $*"
else
"$@"
fi
}
echo "Applying UFW baseline policy (DRY_RUN=$DRY_RUN)..."
run_cmd ufw --force reset
run_cmd ufw default deny incoming
run_cmd ufw default allow outgoing
run_cmd ufw allow from "$SSH_ALLOW_CIDR" to any port 22 proto tcp
run_cmd ufw allow 80/tcp
run_cmd ufw allow 443/tcp
run_cmd ufw --force enable
run_cmd ufw status verbose
echo "Done."

View File

@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 2 ]]; then
echo "Usage: $0 <backup.dump> <target_database_url>"
exit 1
fi
BACKUP_FILE="$1"
TARGET_DATABASE_URL="$2"
if [[ ! -f "$BACKUP_FILE" ]]; then
echo "Backup file not found: $BACKUP_FILE"
exit 1
fi
if ! command -v psql >/dev/null 2>&1; then
echo "psql command not found."
exit 1
fi
echo "Running restore drill..."
bash scripts/restore-postgres.sh "$BACKUP_FILE" "$TARGET_DATABASE_URL"
echo "Running post-restore validation queries..."
psql "$TARGET_DATABASE_URL" -v ON_ERROR_STOP=1 <<'SQL'
select now() as restore_checked_at_utc;
select count(*) as public_tables
from information_schema.tables
where table_schema='public';
select count(*) as users_count from users;
SQL
echo "Restore drill completed successfully."

View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -euo pipefail
HEALTH_URL="${1:-${HEALTH_URL:-}}"
MAX_ATTEMPTS="${MAX_ATTEMPTS:-30}"
SLEEP_SECONDS="${SLEEP_SECONDS:-10}"
if [[ -z "$HEALTH_URL" ]]; then
echo "Usage: HEALTH_URL=https://your-domain/api/health/ready scripts/wait-for-health.sh"
echo " or: scripts/wait-for-health.sh https://your-domain/api/health/ready"
exit 1
fi
tmp_body="$(mktemp)"
trap 'rm -f "$tmp_body"' EXIT
for attempt in $(seq 1 "$MAX_ATTEMPTS"); do
status="$(curl -sS -o "$tmp_body" -w "%{http_code}" "$HEALTH_URL" || true)"
if [[ "$status" == "200" ]] && grep -q '"request_id"' "$tmp_body"; then
echo "Health check passed on attempt $attempt/$MAX_ATTEMPTS"
exit 0
fi
echo "Attempt $attempt/$MAX_ATTEMPTS failed (status=$status), retrying in ${SLEEP_SECONDS}s..."
sleep "$SLEEP_SECONDS"
done
echo "Health check failed after $MAX_ATTEMPTS attempts."
echo "Last response body:"
cat "$tmp_body" || true
exit 1