| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- <?php
- declare(strict_types=1);
- namespace App\Infrastructure\Reputation;
- use App\Domain\Enrichment\EnrichmentResult;
- use App\Infrastructure\Db\RepositoryBase;
- use Doctrine\DBAL\ParameterType;
- /**
- * Read + write gateway for `ip_enrichment`.
- *
- * Originally read-only (M09); M11 grew it into the full enrichment-job
- * sink. Driver-aware UPSERT mirrors `IpScoreRepository::upsert`.
- */
- class IpEnrichmentRepository extends RepositoryBase
- {
- /**
- * @return array{country_code: ?string, asn: ?int, as_org: ?string, enriched_at: ?string}|null
- */
- public function findByIpBin(string $ipBin): ?array
- {
- $row = $this->fetchByIpBin('ip_enrichment', $ipBin);
- if ($row === null) {
- return null;
- }
- return [
- 'country_code' => $row['country_code'] !== null ? (string) $row['country_code'] : null,
- 'asn' => $row['asn'] !== null ? (int) $row['asn'] : null,
- 'as_org' => $row['as_org'] !== null ? (string) $row['as_org'] : null,
- 'enriched_at' => $row['enriched_at'] !== null ? (string) $row['enriched_at'] : null,
- ];
- }
- /**
- * Batched form of `findByIpBin`. Single SELECT with `IN (…)` over the
- * bin set, returning a map keyed by raw `ip_bin` so callers can
- * dereference per-row without an extra round-trip per IP.
- *
- * SEC_REVIEW F32: the admin IPs list previously called
- * `findByIpBin` per page row; at `page_size=200` that's 200
- * round-trips just for enrichment. Bind binary IN-list params with
- * `LARGE_OBJECT` so MySQL treats them as octets, matching `upsert`.
- *
- * @param list<string> $ipBins
- * @return array<string, array{country_code: ?string, asn: ?int, as_org: ?string, enriched_at: ?string}>
- */
- public function findByIpBins(array $ipBins): array
- {
- if ($ipBins === []) {
- return [];
- }
- $names = [];
- $params = [];
- $types = [];
- foreach ($ipBins as $i => $bin) {
- $name = 'b' . $i;
- $names[] = ':' . $name;
- $params[$name] = $bin;
- $types[$name] = ParameterType::LARGE_OBJECT;
- }
- $sql = 'SELECT ip_bin, country_code, asn, as_org, enriched_at FROM ip_enrichment '
- . 'WHERE ip_bin IN (' . implode(', ', $names) . ')';
- /** @var list<array<string, mixed>> $rows */
- $rows = $this->connection()->fetchAllAssociative($sql, $params, $types);
- $out = [];
- foreach ($rows as $row) {
- $out[(string) $row['ip_bin']] = [
- 'country_code' => $row['country_code'] !== null ? (string) $row['country_code'] : null,
- 'asn' => $row['asn'] !== null ? (int) $row['asn'] : null,
- 'as_org' => $row['as_org'] !== null ? (string) $row['as_org'] : null,
- 'enriched_at' => $row['enriched_at'] !== null ? (string) $row['enriched_at'] : null,
- ];
- }
- return $out;
- }
- /**
- * Insert or update one enrichment row. Driver-aware: SQLite uses
- * `ON CONFLICT(ip_bin) DO UPDATE`, MySQL uses `ON DUPLICATE KEY`.
- */
- public function upsert(string $ipBin, EnrichmentResult $result): void
- {
- $platform = $this->connection()->getDatabasePlatform()::class;
- $isMysql = stripos($platform, 'mysql') !== false || stripos($platform, 'mariadb') !== false;
- if ($isMysql) {
- $sql = 'INSERT INTO ip_enrichment (ip_bin, country_code, asn, as_org, enriched_at) '
- . 'VALUES (:ip_bin, :country, :asn, :as_org, :enriched_at) '
- . 'ON DUPLICATE KEY UPDATE '
- . 'country_code = VALUES(country_code), asn = VALUES(asn), '
- . 'as_org = VALUES(as_org), enriched_at = VALUES(enriched_at)';
- } else {
- $sql = 'INSERT INTO ip_enrichment (ip_bin, country_code, asn, as_org, enriched_at) '
- . 'VALUES (:ip_bin, :country, :asn, :as_org, :enriched_at) '
- . 'ON CONFLICT(ip_bin) DO UPDATE SET '
- . 'country_code = excluded.country_code, asn = excluded.asn, '
- . 'as_org = excluded.as_org, enriched_at = excluded.enriched_at';
- }
- $stmt = $this->connection()->prepare($sql);
- $stmt->bindValue('ip_bin', $ipBin, ParameterType::LARGE_OBJECT);
- $stmt->bindValue('country', $result->countryCode);
- if ($result->asn === null) {
- $stmt->bindValue('asn', null, ParameterType::NULL);
- } else {
- $stmt->bindValue('asn', $result->asn, ParameterType::INTEGER);
- }
- $stmt->bindValue('as_org', $result->asOrg);
- $stmt->bindValue('enriched_at', $result->enrichedAt->format('Y-m-d H:i:s'));
- $stmt->executeStatement();
- }
- /**
- * IPs known to the system (reports OR manual_blocks) but missing
- * from `ip_enrichment` (or whose enriched_at was cleared by
- * ?reenrich=true). Ordered by oldest first-seen so backlogs catch
- * up before newer arrivals.
- *
- * @return list<string> ip_bin values, length ≤ $limit
- */
- public function findPending(int $limit): array
- {
- $sql = <<<SQL
- SELECT t.ip_bin AS ip_bin, MIN(t.received_at) AS received_at
- FROM (
- SELECT ip_bin, received_at FROM reports
- UNION ALL
- SELECT ip_bin, created_at AS received_at FROM manual_blocks WHERE kind = 'ip' AND ip_bin IS NOT NULL
- ) t
- LEFT JOIN ip_enrichment e ON e.ip_bin = t.ip_bin AND e.enriched_at IS NOT NULL
- WHERE e.ip_bin IS NULL
- GROUP BY t.ip_bin
- ORDER BY MIN(t.received_at) ASC
- LIMIT :limit
- SQL;
- $stmt = $this->connection()->prepare($sql);
- $stmt->bindValue('limit', $limit, ParameterType::INTEGER);
- /** @var list<array<string, mixed>> $rows */
- $rows = $stmt->executeQuery()->fetchAllAssociative();
- $out = [];
- foreach ($rows as $row) {
- $out[] = (string) $row['ip_bin'];
- }
- return $out;
- }
- /**
- * Clear `enriched_at` on every row. Used only by the `?reenrich=true`
- * flag on refresh-geoip; lets `findPending` re-pick all rows up.
- * Returns the affected row count for the job's `items_processed`.
- */
- public function clearAllEnrichedAt(): int
- {
- return (int) $this->connection()->executeStatement(
- 'UPDATE ip_enrichment SET enriched_at = NULL'
- );
- }
- /**
- * Distinct country codes seen so far with their populations.
- * Powers the IPs-list country dropdown.
- *
- * @return list<array{code: string, count: int}>
- */
- public function countryCounts(): array
- {
- $rows = $this->connection()->fetchAllAssociative(
- 'SELECT country_code AS code, COUNT(*) AS cnt FROM ip_enrichment '
- . 'WHERE country_code IS NOT NULL GROUP BY country_code ORDER BY country_code'
- );
- $out = [];
- foreach ($rows as $row) {
- $out[] = [
- 'code' => (string) $row['code'],
- 'count' => (int) $row['cnt'],
- ];
- }
- return $out;
- }
- }
|