#!/usr/bin/env bash # Doc accuracy guard. # # Greps `doc/*.md` and the README for `/api/v1/<...>` paths and # compares against the OpenAPI document. The matcher templatizes # literal IDs, IPs, and category slugs so `/api/v1/admin/ips/{ip}` # in the spec covers `/api/v1/admin/ips/203.0.113.42` in prose. # # Also ensures every token kind and role name appears at least once # in the doc set — lazy proxy for "the docs cover the auth model". # # Allowlisted paths (declared in `KNOWN_OK`) are accepted without a # spec entry — for things like `/api/v1/openapi.yaml` itself, which # is a public asset, not a typed API endpoint. # # Run from the repo root: # ./scripts/check-doc-endpoints.sh set -euo pipefail REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$REPO_ROOT" SPEC="api/public/openapi.yaml" [ -s "$SPEC" ] || { echo "[fail] $SPEC missing or empty — run composer openapi:build" >&2; exit 1; } DOCS=( README.md doc/architecture.md doc/api-overview.md doc/auth-flows.md doc/frontend-development.md doc/api-reference.md ) # Paths declared in docs but intentionally outside the spec. KNOWN_OK=$(cat <<'EOF' /api/v1/openapi.yaml /api/v1/auth/oauth /api/v1/auth/oauth/start /api/v1/auth/oauth/callback /api/v1/auth/oauth/refresh /api/v1/auth/oauth/revoke EOF ) # Templatize: replace IPv4 octets, IPv6 segments, and bare integers # in path tail with the OpenAPI param placeholders {ip} / {id} / # {name}. templatize() { sed -E ' # IPv4-shaped path tail s#/[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/|$)#/{ip}\1#g # IPv6 (rough): hexes with at least one colon s#/[0-9a-fA-F:]*:[0-9a-fA-F:]+(/|$)#/{ip}\1#g # Trailing bare integer → {id} s#/[0-9]+(/|$)#/{id}\1#g # /jobs/trigger/ → /jobs/trigger/{name} s#/jobs/trigger/[A-Za-z0-9_-]+#/jobs/trigger/{name}#g # /admin/categories/{slug} after templatize: leave as is ' } # ---- Pull doc paths ---- # Match `/api/v1/...` followed by allowed chars; bound at quote / # whitespace / closing punct that markdown wraps them in. Strip # query strings and trailing punctuation. docs_paths=$(grep -hoE '/api/v1/[A-Za-z0-9_/{}.:-]+' "${DOCS[@]}" 2>/dev/null \ | sed -E 's/[?#].*$//' \ | sed -E 's/[.,;:`)]+$//' \ | sed -E 's#/+$##' \ | grep -vE '^/api/v1$' \ | grep -vE '^/api/v1/(admin|auth)$' \ | sort -u \ | templatize \ | sort -u) # ---- Pull spec paths ---- # OpenAPI YAML: every line at exactly two-space indent under `paths:` # whose key starts with `/api/v1/` is an endpoint. Quoted keys are # stripped. spec_paths=$(awk ' /^paths:/ {p=1; next} p && /^[A-Za-z]/ {p=0} p && /^ [^ ]/ { line=$0 sub(/^ /, "", line) sub(/:$/, "", line) gsub(/^[ ]+|[ ]+$/, "", line) gsub(/^['"'"'"]/, "", line) gsub(/['"'"'"]$/, "", line) if (line ~ /^\/api\/v1/) print line } ' "$SPEC" | sort -u) # ---- Compare ---- missing=$( while IFS= read -r p; do [ -z "$p" ] && continue # Allowlisted? if printf '%s\n' "$KNOWN_OK" | grep -qFx "$p"; then continue fi # Direct match? if printf '%s\n' "$spec_paths" | grep -qFx "$p"; then continue fi # Match against spec with the spec also templatized? No — spec # already uses {ip}/{id} placeholders; the templatize sed # above on $docs_paths already aligns them. echo "$p" done <<<"$docs_paths" ) if [ -n "$missing" ]; then echo "[fail] doc-mentioned paths not present in $SPEC:" echo "$missing" | sed 's/^/ - /' exit 1 fi # ---- Token-kind / role spelling ---- for word in reporter consumer admin service viewer operator; do if ! grep -qE "\\b$word\\b" "${DOCS[@]}"; then echo "[fail] doc set never mentions '$word'" exit 1 fi done docs_count=$(echo "$docs_paths" | grep -c . || true) spec_count=$(echo "$spec_paths" | grep -c . || true) echo "[ok] docs reference $docs_count /api/v1/* paths; spec declares $spec_count"