add deploy health gate and host security ops scripts
This commit is contained in:
parent
4d09d7e5b4
commit
fa3129bb1a
@ -42,6 +42,9 @@ jobs:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Trigger Dokploy Deploy
|
||||
env:
|
||||
DOKPLOY_DEPLOY_HOOK: ${{ secrets.DOKPLOY_DEPLOY_HOOK }}
|
||||
@ -54,3 +57,15 @@ jobs:
|
||||
curl -fsS -X POST "$DOKPLOY_DEPLOY_HOOK" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"imageTag\":\"$IMAGE_TAG\"}"
|
||||
|
||||
- name: Wait for Ready Health Check
|
||||
env:
|
||||
HEALTH_URL: ${{ secrets.DOKPLOY_HEALTHCHECK_URL }}
|
||||
MAX_ATTEMPTS: "30"
|
||||
SLEEP_SECONDS: "10"
|
||||
run: |
|
||||
if [ -z "$HEALTH_URL" ]; then
|
||||
echo "Missing DOKPLOY_HEALTHCHECK_URL secret"
|
||||
exit 1
|
||||
fi
|
||||
bash scripts/wait-for-health.sh
|
||||
|
||||
@ -113,6 +113,16 @@ Primary outcomes:
|
||||
- Added operational smoke tooling for deploy/rollback validation:
|
||||
- `scripts/smoke-public-launch.sh` checks health endpoints, `X-Request-Id`, and `request_id` response fields.
|
||||
- Expanded `docs/public-launch-runbook.md` with deployment smoke and rollback checklist sections.
|
||||
- Added host hardening and DR drill scripts:
|
||||
- `scripts/harden-host-ufw.sh` (UFW baseline with dry-run default).
|
||||
- `scripts/check-host-security.sh` (ports/firewall/fail2ban/docker status snapshot).
|
||||
- `scripts/restore-drill-postgres.sh` (restore + validation query workflow).
|
||||
- Updated docs to use executable operational checks in:
|
||||
- `docs/public-launch-runbook.md`
|
||||
- `docs/06_SECURITY_REVIEW.md`
|
||||
- Added deploy health gate automation:
|
||||
- `scripts/wait-for-health.sh` polls ready endpoint and verifies `request_id` payload.
|
||||
- `.gitea/workflows/deploy-dokploy.yml` now runs post-deploy health verification using `DOKPLOY_HEALTHCHECK_URL`.
|
||||
|
||||
### Risks / Notes to Revisit
|
||||
- Workspace is intentionally dirty; commits must be path-scoped to avoid mixing unrelated changes.
|
||||
|
||||
@ -66,6 +66,6 @@ This document tracks launch-critical security findings for app, data, users, and
|
||||
- [x] `npm run lint` passes (warnings acceptable for now).
|
||||
- [x] `npm test` passes.
|
||||
- [x] `npm run build` passes.
|
||||
- [ ] Production host firewall rules verified.
|
||||
- [ ] Production host firewall rules verified (`scripts/harden-host-ufw.sh` + `scripts/check-host-security.sh`).
|
||||
- [ ] SSH restricted to VPN/allowlist.
|
||||
- [ ] Backup restore drill logged for current week.
|
||||
- [ ] Backup restore drill logged for current week (`scripts/restore-drill-postgres.sh`).
|
||||
|
||||
@ -28,6 +28,10 @@
|
||||
- `REGISTRY_USER`
|
||||
- `REGISTRY_PASS`
|
||||
- `DOKPLOY_DEPLOY_HOOK`
|
||||
- `DOKPLOY_HEALTHCHECK_URL`
|
||||
- Health gate:
|
||||
- workflow calls `scripts/wait-for-health.sh` against `DOKPLOY_HEALTHCHECK_URL`
|
||||
- default retry window: 5 minutes (30 attempts x 10s)
|
||||
|
||||
## 4) Reverse Proxy + Network Hardening
|
||||
- Use `docker/nginx/fiddy.conf` as baseline.
|
||||
@ -41,6 +45,10 @@
|
||||
- Confirm Nginx writes JSON logs:
|
||||
- `/var/log/nginx/fiddy-access.log`
|
||||
- `/var/log/nginx/fiddy-error.log`
|
||||
- Apply/verify host baseline using scripts:
|
||||
- dry-run firewall apply: `SSH_ALLOW_CIDR=<your-cidr> DRY_RUN=1 scripts/harden-host-ufw.sh`
|
||||
- real firewall apply: `SSH_ALLOW_CIDR=<your-cidr> DRY_RUN=0 sudo scripts/harden-host-ufw.sh`
|
||||
- host status audit: `scripts/check-host-security.sh`
|
||||
|
||||
## 5) Observability
|
||||
- Bring up monitoring stack:
|
||||
@ -66,7 +74,7 @@
|
||||
- Retention:
|
||||
- default 7 days (`RETENTION_DAYS=7`)
|
||||
- Restore drill:
|
||||
- `scripts/restore-postgres.sh backups/postgres/<file>.dump <target_database_url>`
|
||||
- `scripts/restore-drill-postgres.sh backups/postgres/<file>.dump <target_database_url>`
|
||||
- Run restore drill on non-prod DB before public launch.
|
||||
|
||||
## 7) Incident Response Quick Flow
|
||||
|
||||
43
scripts/check-host-security.sh
Normal file
43
scripts/check-host-security.sh
Normal file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "== Host Security Check =="
|
||||
echo "Timestamp (UTC): $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
echo
|
||||
|
||||
echo "== Listening TCP/UDP Ports =="
|
||||
if command -v ss >/dev/null 2>&1; then
|
||||
ss -tulpn || true
|
||||
else
|
||||
echo "ss command not found"
|
||||
fi
|
||||
echo
|
||||
|
||||
echo "== UFW Status =="
|
||||
if command -v ufw >/dev/null 2>&1; then
|
||||
ufw status verbose || true
|
||||
else
|
||||
echo "ufw command not found"
|
||||
fi
|
||||
echo
|
||||
|
||||
echo "== Fail2ban Status =="
|
||||
if command -v fail2ban-client >/dev/null 2>&1; then
|
||||
fail2ban-client status || true
|
||||
else
|
||||
echo "fail2ban-client command not found"
|
||||
fi
|
||||
echo
|
||||
|
||||
echo "== Docker Containers =="
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}' || true
|
||||
else
|
||||
echo "docker command not found"
|
||||
fi
|
||||
echo
|
||||
|
||||
echo "== Quick Expected Surface =="
|
||||
echo "- Public inbound expected: 80/tcp, 443/tcp only"
|
||||
echo "- SSH expected: restricted source allowlist or VPN-only"
|
||||
echo "- Postgres expected: private-only, not internet-exposed"
|
||||
35
scripts/harden-host-ufw.sh
Normal file
35
scripts/harden-host-ufw.sh
Normal file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if ! command -v ufw >/dev/null 2>&1; then
|
||||
echo "ufw is not installed on this host."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SSH_ALLOW_CIDR="${SSH_ALLOW_CIDR:-}"
|
||||
DRY_RUN="${DRY_RUN:-1}"
|
||||
|
||||
if [[ -z "$SSH_ALLOW_CIDR" ]]; then
|
||||
echo "SSH_ALLOW_CIDR is required (example: SSH_ALLOW_CIDR=203.0.113.10/32)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run_cmd() {
|
||||
if [[ "$DRY_RUN" == "1" ]]; then
|
||||
echo "+ $*"
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "Applying UFW baseline policy (DRY_RUN=$DRY_RUN)..."
|
||||
run_cmd ufw --force reset
|
||||
run_cmd ufw default deny incoming
|
||||
run_cmd ufw default allow outgoing
|
||||
run_cmd ufw allow from "$SSH_ALLOW_CIDR" to any port 22 proto tcp
|
||||
run_cmd ufw allow 80/tcp
|
||||
run_cmd ufw allow 443/tcp
|
||||
run_cmd ufw --force enable
|
||||
run_cmd ufw status verbose
|
||||
|
||||
echo "Done."
|
||||
34
scripts/restore-drill-postgres.sh
Normal file
34
scripts/restore-drill-postgres.sh
Normal file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -lt 2 ]]; then
|
||||
echo "Usage: $0 <backup.dump> <target_database_url>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BACKUP_FILE="$1"
|
||||
TARGET_DATABASE_URL="$2"
|
||||
|
||||
if [[ ! -f "$BACKUP_FILE" ]]; then
|
||||
echo "Backup file not found: $BACKUP_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v psql >/dev/null 2>&1; then
|
||||
echo "psql command not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Running restore drill..."
|
||||
bash scripts/restore-postgres.sh "$BACKUP_FILE" "$TARGET_DATABASE_URL"
|
||||
|
||||
echo "Running post-restore validation queries..."
|
||||
psql "$TARGET_DATABASE_URL" -v ON_ERROR_STOP=1 <<'SQL'
|
||||
select now() as restore_checked_at_utc;
|
||||
select count(*) as public_tables
|
||||
from information_schema.tables
|
||||
where table_schema='public';
|
||||
select count(*) as users_count from users;
|
||||
SQL
|
||||
|
||||
echo "Restore drill completed successfully."
|
||||
31
scripts/wait-for-health.sh
Normal file
31
scripts/wait-for-health.sh
Normal file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
HEALTH_URL="${1:-${HEALTH_URL:-}}"
|
||||
MAX_ATTEMPTS="${MAX_ATTEMPTS:-30}"
|
||||
SLEEP_SECONDS="${SLEEP_SECONDS:-10}"
|
||||
|
||||
if [[ -z "$HEALTH_URL" ]]; then
|
||||
echo "Usage: HEALTH_URL=https://your-domain/api/health/ready scripts/wait-for-health.sh"
|
||||
echo " or: scripts/wait-for-health.sh https://your-domain/api/health/ready"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
tmp_body="$(mktemp)"
|
||||
trap 'rm -f "$tmp_body"' EXIT
|
||||
|
||||
for attempt in $(seq 1 "$MAX_ATTEMPTS"); do
|
||||
status="$(curl -sS -o "$tmp_body" -w "%{http_code}" "$HEALTH_URL" || true)"
|
||||
if [[ "$status" == "200" ]] && grep -q '"request_id"' "$tmp_body"; then
|
||||
echo "Health check passed on attempt $attempt/$MAX_ATTEMPTS"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Attempt $attempt/$MAX_ATTEMPTS failed (status=$status), retrying in ${SLEEP_SECONDS}s..."
|
||||
sleep "$SLEEP_SECONDS"
|
||||
done
|
||||
|
||||
echo "Health check failed after $MAX_ATTEMPTS attempts."
|
||||
echo "Last response body:"
|
||||
cat "$tmp_body" || true
|
||||
exit 1
|
||||
Loading…
Reference in New Issue
Block a user