add deploy health gate and host security ops scripts
This commit is contained in:
parent
4d09d7e5b4
commit
fa3129bb1a
@ -42,6 +42,9 @@ jobs:
|
|||||||
needs: build
|
needs: build
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
- name: Checkout repo
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Trigger Dokploy Deploy
|
- name: Trigger Dokploy Deploy
|
||||||
env:
|
env:
|
||||||
DOKPLOY_DEPLOY_HOOK: ${{ secrets.DOKPLOY_DEPLOY_HOOK }}
|
DOKPLOY_DEPLOY_HOOK: ${{ secrets.DOKPLOY_DEPLOY_HOOK }}
|
||||||
@ -54,3 +57,15 @@ jobs:
|
|||||||
curl -fsS -X POST "$DOKPLOY_DEPLOY_HOOK" \
|
curl -fsS -X POST "$DOKPLOY_DEPLOY_HOOK" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d "{\"imageTag\":\"$IMAGE_TAG\"}"
|
-d "{\"imageTag\":\"$IMAGE_TAG\"}"
|
||||||
|
|
||||||
|
- name: Wait for Ready Health Check
|
||||||
|
env:
|
||||||
|
HEALTH_URL: ${{ secrets.DOKPLOY_HEALTHCHECK_URL }}
|
||||||
|
MAX_ATTEMPTS: "30"
|
||||||
|
SLEEP_SECONDS: "10"
|
||||||
|
run: |
|
||||||
|
if [ -z "$HEALTH_URL" ]; then
|
||||||
|
echo "Missing DOKPLOY_HEALTHCHECK_URL secret"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
bash scripts/wait-for-health.sh
|
||||||
|
|||||||
@ -113,6 +113,16 @@ Primary outcomes:
|
|||||||
- Added operational smoke tooling for deploy/rollback validation:
|
- Added operational smoke tooling for deploy/rollback validation:
|
||||||
- `scripts/smoke-public-launch.sh` checks health endpoints, `X-Request-Id`, and `request_id` response fields.
|
- `scripts/smoke-public-launch.sh` checks health endpoints, `X-Request-Id`, and `request_id` response fields.
|
||||||
- Expanded `docs/public-launch-runbook.md` with deployment smoke and rollback checklist sections.
|
- Expanded `docs/public-launch-runbook.md` with deployment smoke and rollback checklist sections.
|
||||||
|
- Added host hardening and DR drill scripts:
|
||||||
|
- `scripts/harden-host-ufw.sh` (UFW baseline with dry-run default).
|
||||||
|
- `scripts/check-host-security.sh` (ports/firewall/fail2ban/docker status snapshot).
|
||||||
|
- `scripts/restore-drill-postgres.sh` (restore + validation query workflow).
|
||||||
|
- Updated docs to use executable operational checks in:
|
||||||
|
- `docs/public-launch-runbook.md`
|
||||||
|
- `docs/06_SECURITY_REVIEW.md`
|
||||||
|
- Added deploy health gate automation:
|
||||||
|
- `scripts/wait-for-health.sh` polls ready endpoint and verifies `request_id` payload.
|
||||||
|
- `.gitea/workflows/deploy-dokploy.yml` now runs post-deploy health verification using `DOKPLOY_HEALTHCHECK_URL`.
|
||||||
|
|
||||||
### Risks / Notes to Revisit
|
### Risks / Notes to Revisit
|
||||||
- Workspace is intentionally dirty; commits must be path-scoped to avoid mixing unrelated changes.
|
- Workspace is intentionally dirty; commits must be path-scoped to avoid mixing unrelated changes.
|
||||||
|
|||||||
@ -66,6 +66,6 @@ This document tracks launch-critical security findings for app, data, users, and
|
|||||||
- [x] `npm run lint` passes (warnings acceptable for now).
|
- [x] `npm run lint` passes (warnings acceptable for now).
|
||||||
- [x] `npm test` passes.
|
- [x] `npm test` passes.
|
||||||
- [x] `npm run build` passes.
|
- [x] `npm run build` passes.
|
||||||
- [ ] Production host firewall rules verified.
|
- [ ] Production host firewall rules verified (`scripts/harden-host-ufw.sh` + `scripts/check-host-security.sh`).
|
||||||
- [ ] SSH restricted to VPN/allowlist.
|
- [ ] SSH restricted to VPN/allowlist.
|
||||||
- [ ] Backup restore drill logged for current week.
|
- [ ] Backup restore drill logged for current week (`scripts/restore-drill-postgres.sh`).
|
||||||
|
|||||||
@ -28,6 +28,10 @@
|
|||||||
- `REGISTRY_USER`
|
- `REGISTRY_USER`
|
||||||
- `REGISTRY_PASS`
|
- `REGISTRY_PASS`
|
||||||
- `DOKPLOY_DEPLOY_HOOK`
|
- `DOKPLOY_DEPLOY_HOOK`
|
||||||
|
- `DOKPLOY_HEALTHCHECK_URL`
|
||||||
|
- Health gate:
|
||||||
|
- workflow calls `scripts/wait-for-health.sh` against `DOKPLOY_HEALTHCHECK_URL`
|
||||||
|
- default retry window: 5 minutes (30 attempts x 10s)
|
||||||
|
|
||||||
## 4) Reverse Proxy + Network Hardening
|
## 4) Reverse Proxy + Network Hardening
|
||||||
- Use `docker/nginx/fiddy.conf` as baseline.
|
- Use `docker/nginx/fiddy.conf` as baseline.
|
||||||
@ -41,6 +45,10 @@
|
|||||||
- Confirm Nginx writes JSON logs:
|
- Confirm Nginx writes JSON logs:
|
||||||
- `/var/log/nginx/fiddy-access.log`
|
- `/var/log/nginx/fiddy-access.log`
|
||||||
- `/var/log/nginx/fiddy-error.log`
|
- `/var/log/nginx/fiddy-error.log`
|
||||||
|
- Apply/verify host baseline using scripts:
|
||||||
|
- dry-run firewall apply: `SSH_ALLOW_CIDR=<your-cidr> DRY_RUN=1 scripts/harden-host-ufw.sh`
|
||||||
|
- real firewall apply: `SSH_ALLOW_CIDR=<your-cidr> DRY_RUN=0 sudo scripts/harden-host-ufw.sh`
|
||||||
|
- host status audit: `scripts/check-host-security.sh`
|
||||||
|
|
||||||
## 5) Observability
|
## 5) Observability
|
||||||
- Bring up monitoring stack:
|
- Bring up monitoring stack:
|
||||||
@ -66,7 +74,7 @@
|
|||||||
- Retention:
|
- Retention:
|
||||||
- default 7 days (`RETENTION_DAYS=7`)
|
- default 7 days (`RETENTION_DAYS=7`)
|
||||||
- Restore drill:
|
- Restore drill:
|
||||||
- `scripts/restore-postgres.sh backups/postgres/<file>.dump <target_database_url>`
|
- `scripts/restore-drill-postgres.sh backups/postgres/<file>.dump <target_database_url>`
|
||||||
- Run restore drill on non-prod DB before public launch.
|
- Run restore drill on non-prod DB before public launch.
|
||||||
|
|
||||||
## 7) Incident Response Quick Flow
|
## 7) Incident Response Quick Flow
|
||||||
|
|||||||
43
scripts/check-host-security.sh
Normal file
43
scripts/check-host-security.sh
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
echo "== Host Security Check =="
|
||||||
|
echo "Timestamp (UTC): $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "== Listening TCP/UDP Ports =="
|
||||||
|
if command -v ss >/dev/null 2>&1; then
|
||||||
|
ss -tulpn || true
|
||||||
|
else
|
||||||
|
echo "ss command not found"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "== UFW Status =="
|
||||||
|
if command -v ufw >/dev/null 2>&1; then
|
||||||
|
ufw status verbose || true
|
||||||
|
else
|
||||||
|
echo "ufw command not found"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "== Fail2ban Status =="
|
||||||
|
if command -v fail2ban-client >/dev/null 2>&1; then
|
||||||
|
fail2ban-client status || true
|
||||||
|
else
|
||||||
|
echo "fail2ban-client command not found"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "== Docker Containers =="
|
||||||
|
if command -v docker >/dev/null 2>&1; then
|
||||||
|
docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}' || true
|
||||||
|
else
|
||||||
|
echo "docker command not found"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "== Quick Expected Surface =="
|
||||||
|
echo "- Public inbound expected: 80/tcp, 443/tcp only"
|
||||||
|
echo "- SSH expected: restricted source allowlist or VPN-only"
|
||||||
|
echo "- Postgres expected: private-only, not internet-exposed"
|
||||||
35
scripts/harden-host-ufw.sh
Normal file
35
scripts/harden-host-ufw.sh
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if ! command -v ufw >/dev/null 2>&1; then
|
||||||
|
echo "ufw is not installed on this host."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
SSH_ALLOW_CIDR="${SSH_ALLOW_CIDR:-}"
|
||||||
|
DRY_RUN="${DRY_RUN:-1}"
|
||||||
|
|
||||||
|
if [[ -z "$SSH_ALLOW_CIDR" ]]; then
|
||||||
|
echo "SSH_ALLOW_CIDR is required (example: SSH_ALLOW_CIDR=203.0.113.10/32)."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_cmd() {
|
||||||
|
if [[ "$DRY_RUN" == "1" ]]; then
|
||||||
|
echo "+ $*"
|
||||||
|
else
|
||||||
|
"$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Applying UFW baseline policy (DRY_RUN=$DRY_RUN)..."
|
||||||
|
run_cmd ufw --force reset
|
||||||
|
run_cmd ufw default deny incoming
|
||||||
|
run_cmd ufw default allow outgoing
|
||||||
|
run_cmd ufw allow from "$SSH_ALLOW_CIDR" to any port 22 proto tcp
|
||||||
|
run_cmd ufw allow 80/tcp
|
||||||
|
run_cmd ufw allow 443/tcp
|
||||||
|
run_cmd ufw --force enable
|
||||||
|
run_cmd ufw status verbose
|
||||||
|
|
||||||
|
echo "Done."
|
||||||
34
scripts/restore-drill-postgres.sh
Normal file
34
scripts/restore-drill-postgres.sh
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [[ $# -lt 2 ]]; then
|
||||||
|
echo "Usage: $0 <backup.dump> <target_database_url>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
BACKUP_FILE="$1"
|
||||||
|
TARGET_DATABASE_URL="$2"
|
||||||
|
|
||||||
|
if [[ ! -f "$BACKUP_FILE" ]]; then
|
||||||
|
echo "Backup file not found: $BACKUP_FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v psql >/dev/null 2>&1; then
|
||||||
|
echo "psql command not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Running restore drill..."
|
||||||
|
bash scripts/restore-postgres.sh "$BACKUP_FILE" "$TARGET_DATABASE_URL"
|
||||||
|
|
||||||
|
echo "Running post-restore validation queries..."
|
||||||
|
psql "$TARGET_DATABASE_URL" -v ON_ERROR_STOP=1 <<'SQL'
|
||||||
|
select now() as restore_checked_at_utc;
|
||||||
|
select count(*) as public_tables
|
||||||
|
from information_schema.tables
|
||||||
|
where table_schema='public';
|
||||||
|
select count(*) as users_count from users;
|
||||||
|
SQL
|
||||||
|
|
||||||
|
echo "Restore drill completed successfully."
|
||||||
31
scripts/wait-for-health.sh
Normal file
31
scripts/wait-for-health.sh
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
HEALTH_URL="${1:-${HEALTH_URL:-}}"
|
||||||
|
MAX_ATTEMPTS="${MAX_ATTEMPTS:-30}"
|
||||||
|
SLEEP_SECONDS="${SLEEP_SECONDS:-10}"
|
||||||
|
|
||||||
|
if [[ -z "$HEALTH_URL" ]]; then
|
||||||
|
echo "Usage: HEALTH_URL=https://your-domain/api/health/ready scripts/wait-for-health.sh"
|
||||||
|
echo " or: scripts/wait-for-health.sh https://your-domain/api/health/ready"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
tmp_body="$(mktemp)"
|
||||||
|
trap 'rm -f "$tmp_body"' EXIT
|
||||||
|
|
||||||
|
for attempt in $(seq 1 "$MAX_ATTEMPTS"); do
|
||||||
|
status="$(curl -sS -o "$tmp_body" -w "%{http_code}" "$HEALTH_URL" || true)"
|
||||||
|
if [[ "$status" == "200" ]] && grep -q '"request_id"' "$tmp_body"; then
|
||||||
|
echo "Health check passed on attempt $attempt/$MAX_ATTEMPTS"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attempt $attempt/$MAX_ATTEMPTS failed (status=$status), retrying in ${SLEEP_SECONDS}s..."
|
||||||
|
sleep "$SLEEP_SECONDS"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Health check failed after $MAX_ATTEMPTS attempts."
|
||||||
|
echo "Last response body:"
|
||||||
|
cat "$tmp_body" || true
|
||||||
|
exit 1
|
||||||
Loading…
Reference in New Issue
Block a user