SecretScrubbingProcessor.php 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. <?php
  2. declare(strict_types=1);
  3. namespace App\Infrastructure\Logging;
  4. use Monolog\LogRecord;
  5. use Monolog\Processor\ProcessorInterface;
  6. /**
  7. * Scrubs sensitive values out of Monolog records before they hit a handler
  8. * (SPEC §M14.4).
  9. *
  10. * Two layers of defence:
  11. * 1. **Key-based redaction** in `context` and `extra`. Any key whose name
  12. * matches one of the sensitive-key patterns gets its value replaced
  13. * with `***`.
  14. * 2. **Pattern-based redaction** of the rendered `message` and any string
  15. * values left in `context` / `extra`. Catches Bearer tokens that
  16. * slipped into the message via `sprintf` or were embedded in a free-
  17. * form string. The token kind prefix is preserved so triage logs
  18. * ("which kind of token failed?") stay useful.
  19. *
  20. * The processor is intentionally simple: any new sensitive-shaped data we
  21. * find in production should be added to either the key-list or the regex
  22. * list with a one-line PR.
  23. */
  24. final class SecretScrubbingProcessor implements ProcessorInterface
  25. {
  26. private const REDACTED = '***';
  27. /**
  28. * Lower-case substrings; we redact a key if any of these appear in it.
  29. * Examples that get hit:
  30. * `authorization`, `Authorization`, `auth_token`,
  31. * `password`, `password_hash`, `LOCAL_ADMIN_PASSWORD_HASH`,
  32. * `oidc_client_secret`, `client_secret`,
  33. * `maxmind_license_key`, `ipinfo_token`,
  34. * `db_mysql_password`, `internal_job_token`,
  35. * `ui_service_token`, `bearer`, `cookie`, `set-cookie`.
  36. */
  37. private const SENSITIVE_KEY_NEEDLES = [
  38. 'password',
  39. 'authorization',
  40. 'auth_token',
  41. 'access_token',
  42. 'refresh_token',
  43. 'bearer',
  44. 'secret',
  45. 'license_key',
  46. 'license-key',
  47. 'license_token',
  48. 'ipinfo_token',
  49. 'service_token',
  50. 'job_token',
  51. 'cookie',
  52. ];
  53. /**
  54. * Pattern → replacement pairs used on string values. The token regex
  55. * preserves the irdb prefix + kind so logs still show which token kind
  56. * was involved without leaking the secret half.
  57. *
  58. * @var list<array{0: string, 1: string|callable(array<int|string, string>): string}>
  59. */
  60. private const VALUE_PATTERNS = [
  61. // Bearer header value, with or without the keyword. Replaces the
  62. // value but keeps the kind prefix as a triage breadcrumb.
  63. ['/(Bearer\s+irdb_(?:rep|con|adm|svc)_)[A-Z2-7]{32}/', '$1***'],
  64. // SEC_REVIEW F65: Bearer with any non-trivial value. The
  65. // floor was {20,} which let a < 20-char Bearer slip through;
  66. // dropped to {8,} which still excludes the common literal
  67. // strings without false-positive matching prose.
  68. ['/(Bearer\s+)[A-Za-z0-9._\-]{8,}/', '$1***'],
  69. // SEC_REVIEW F65: raw JWT (`header.payload.signature`)
  70. // anywhere in the message or value. Anchored on `eyJ`
  71. // because every JWT header is the base64url encoding of a
  72. // JSON object that starts with `{"…`, which is `eyJ…`.
  73. // Anchoring eliminates false positives like `192.168.1.1`
  74. // or `lib.so.6` — those don't start with `eyJ`. Each
  75. // segment requires ≥4 chars to skip pathological short
  76. // matches. The replacement keeps the `eyJ` prefix as a
  77. // triage breadcrumb.
  78. ['/\beyJ[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}\b/', 'eyJ***'],
  79. // Bare irdb_<kind>_<32 base32> tokens that aren't preceded by Bearer.
  80. ['/\birdb_(rep|con|adm|svc)_[A-Z2-7]{32}\b/', 'irdb_$1_***'],
  81. // Argon2 password hashes.
  82. ['/\$argon2(?:i|id|d)\$[^\s\'"]+/', '$argon2***'],
  83. // bcrypt password hashes.
  84. ['/\$2[aby]?\$\d{2}\$[A-Za-z0-9.\/]{53}/', '$2***'],
  85. ];
  86. public function __invoke(LogRecord $record): LogRecord
  87. {
  88. $context = self::scrubArray($record->context);
  89. $extra = self::scrubArray($record->extra);
  90. $message = self::scrubString($record->message);
  91. return $record->with(message: $message, context: $context, extra: $extra);
  92. }
  93. /**
  94. * @param array<array-key, mixed> $data
  95. * @return array<array-key, mixed>
  96. */
  97. private static function scrubArray(array $data): array
  98. {
  99. $out = [];
  100. foreach ($data as $key => $value) {
  101. $keyHit = is_string($key) && self::isSensitiveKey($key);
  102. if ($keyHit) {
  103. $out[$key] = self::REDACTED;
  104. continue;
  105. }
  106. if (is_array($value)) {
  107. $out[$key] = self::scrubArray($value);
  108. } elseif (is_string($value)) {
  109. $out[$key] = self::scrubString($value);
  110. } else {
  111. $out[$key] = $value;
  112. }
  113. }
  114. return $out;
  115. }
  116. private static function isSensitiveKey(string $key): bool
  117. {
  118. $lower = strtolower($key);
  119. foreach (self::SENSITIVE_KEY_NEEDLES as $needle) {
  120. if (str_contains($lower, $needle)) {
  121. return true;
  122. }
  123. }
  124. return false;
  125. }
  126. private static function scrubString(string $value): string
  127. {
  128. foreach (self::VALUE_PATTERNS as [$pattern, $replacement]) {
  129. $value = (string) preg_replace($pattern, (string) $replacement, $value);
  130. }
  131. return $value;
  132. }
  133. }