Explorar el Código

fix: batch-load admin IPs list per-row lookups (SEC_REVIEW F32)

`IpsController::list` previously issued three DB queries per page row
(enrichment, top category, hasAnyScore). At page_size=200 that's 600+
round-trips on top of the search + count. Replace the per-row loop
with two batch lookups — `IpEnrichmentRepository::findByIpBins()` and
`IpScoreRepository::topCategoryByIpBins()` — and derive the row's
effective status from the `max_score` column already in the search
result, falling back to the in-memory CIDR evaluator that's loaded
once per request. Total DB cost is now invariant in page_size: search
+ count + 2 batch lookups = 4 round-trips.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
chiappa hace 3 días
padre
commit
05943057b8

+ 37 - 17
api/src/Application/Admin/IpsController.php

@@ -7,6 +7,7 @@ namespace App\Application\Admin;
 use App\Domain\Category\Category;
 use App\Domain\Ip\InvalidIpException;
 use App\Domain\Ip\IpAddress;
+use App\Domain\Reputation\CidrEvaluator;
 use App\Domain\Reputation\EffectiveStatus;
 use App\Domain\Reputation\EffectiveStatusService;
 use App\Infrastructure\Allowlist\AllowlistRepository;
@@ -78,12 +79,25 @@ final class IpsController
         $result = $this->ipScores->searchIps($repoFilters, $pageSize, ($page - 1) * $pageSize);
         $slugByCategoryId = $this->slugByCategoryId();
 
-        // Per-row: top category slug + raw enrichment (country flag rendering happens in the UI).
+        // SEC_REVIEW F32: batch the per-row enrichment / top-category /
+        // status lookups so the request stays at O(1) DB round-trips
+        // regardless of page_size, instead of O(page_size). The CIDR
+        // evaluator already gives O(1) in-memory hash lookups for the
+        // allow / manual-block sets, so the only DB hits we need are:
+        //   - one IN-list SELECT against `ip_enrichment`
+        //   - one IN-list SELECT against `ip_scores` for top category
+        // `max_score > 0` (already in the search result row) tells us
+        // whether the IP would resolve to `Scored`, replacing the
+        // per-row `hasAnyScore` query.
+        $ipBins = array_map(static fn (array $row): string => $row['ip_bin'], $result['items']);
+        $enrichmentByBin = $this->enrichment->findByIpBins($ipBins);
+        $topCategoryByBin = $this->ipScores->topCategoryByIpBins($ipBins);
+
         $items = [];
         foreach ($result['items'] as $row) {
-            $top = $this->topCategoryFor($row['ip_bin']);
-            $enrichmentRow = $this->enrichment->findByIpBin($row['ip_bin']);
-            $effective = $this->effectiveStatusFor($row['ip_bin']);
+            $bin = $row['ip_bin'];
+            $top = $topCategoryByBin[$bin] ?? null;
+            $effective = $this->effectiveStatusFromRow($bin, (float) $row['max_score'], $evaluator);
             $items[] = [
                 'ip' => $row['ip_text'],
                 'is_ipv4' => str_contains($row['ip_text'], '.') && !str_contains($row['ip_text'], ':'),
@@ -92,7 +106,7 @@ final class IpsController
                 'pair_count' => $row['pair_count'],
                 'last_report_at' => $this->formatTimestamp($row['last_report_at']),
                 'status' => $effective->value,
-                'enrichment' => $enrichmentRow,
+                'enrichment' => $enrichmentByBin[$bin] ?? null,
             ];
         }
 
@@ -269,21 +283,27 @@ final class IpsController
         return null;
     }
 
-    private function topCategoryFor(string $ipBin): ?int
+    /**
+     * Resolve the row's effective status without an extra DB hit. The
+     * search result already contains `max_score`, so we can short-circuit
+     * the `hasAnyScore` query that `EffectiveStatusService` would issue
+     * per row. Allow / manual-block checks stay O(1) via the in-memory
+     * evaluator that's already loaded once for the request.
+     */
+    private function effectiveStatusFromRow(string $ipBin, float $maxScore, CidrEvaluator $evaluator): EffectiveStatus
     {
-        $rows = $this->ipScores->scoresForIp($ipBin);
-        foreach ($rows as $row) {
-            if ($row['score'] > 0) {
-                return $row['category_id'];
-            }
+        $ip = IpAddress::fromBinary($ipBin);
+        if ($evaluator->isAllowlisted($ip)) {
+            return EffectiveStatus::Allowlisted;
+        }
+        if ($evaluator->isManuallyBlocked($ip)) {
+            return EffectiveStatus::ManuallyBlocked;
+        }
+        if ($maxScore > 0) {
+            return EffectiveStatus::Scored;
         }
 
-        return null;
-    }
-
-    private function effectiveStatusFor(string $ipBin): EffectiveStatus
-    {
-        return $this->effectiveStatus->forIp(IpAddress::fromBinary($ipBin));
+        return EffectiveStatus::Clean;
     }
 
     /**

+ 47 - 0
api/src/Infrastructure/Reputation/IpEnrichmentRepository.php

@@ -34,6 +34,53 @@ class IpEnrichmentRepository extends RepositoryBase
         ];
     }
 
+    /**
+     * Batched form of `findByIpBin`. Single SELECT with `IN (…)` over the
+     * bin set, returning a map keyed by raw `ip_bin` so callers can
+     * dereference per-row without an extra round-trip per IP.
+     *
+     * SEC_REVIEW F32: the admin IPs list previously called
+     * `findByIpBin` per page row; at `page_size=200` that's 200
+     * round-trips just for enrichment. Bind binary IN-list params with
+     * `LARGE_OBJECT` so MySQL treats them as octets, matching `upsert`.
+     *
+     * @param list<string> $ipBins
+     * @return array<string, array{country_code: ?string, asn: ?int, as_org: ?string, enriched_at: ?string}>
+     */
+    public function findByIpBins(array $ipBins): array
+    {
+        if ($ipBins === []) {
+            return [];
+        }
+
+        $names = [];
+        $params = [];
+        $types = [];
+        foreach ($ipBins as $i => $bin) {
+            $name = 'b' . $i;
+            $names[] = ':' . $name;
+            $params[$name] = $bin;
+            $types[$name] = ParameterType::LARGE_OBJECT;
+        }
+        $sql = 'SELECT ip_bin, country_code, asn, as_org, enriched_at FROM ip_enrichment '
+            . 'WHERE ip_bin IN (' . implode(', ', $names) . ')';
+
+        /** @var list<array<string, mixed>> $rows */
+        $rows = $this->connection()->fetchAllAssociative($sql, $params, $types);
+
+        $out = [];
+        foreach ($rows as $row) {
+            $out[(string) $row['ip_bin']] = [
+                'country_code' => $row['country_code'] !== null ? (string) $row['country_code'] : null,
+                'asn' => $row['asn'] !== null ? (int) $row['asn'] : null,
+                'as_org' => $row['as_org'] !== null ? (string) $row['as_org'] : null,
+                'enriched_at' => $row['enriched_at'] !== null ? (string) $row['enriched_at'] : null,
+            ];
+        }
+
+        return $out;
+    }
+
     /**
      * Insert or update one enrichment row. Driver-aware: SQLite uses
      * `ON CONFLICT(ip_bin) DO UPDATE`, MySQL uses `ON DUPLICATE KEY`.

+ 49 - 0
api/src/Infrastructure/Reputation/IpScoreRepository.php

@@ -305,6 +305,55 @@ class IpScoreRepository extends RepositoryBase
         return ['items' => $items, 'total' => $total];
     }
 
+    /**
+     * Batched "top scoring category" lookup keyed by ip_bin. For each input
+     * bin, returns the `category_id` with the highest `score > 0` row, or
+     * absent if no such row exists.
+     *
+     * SEC_REVIEW F32: the admin IPs list previously called
+     * `scoresForIp` per page row; at `page_size=200` that's 200
+     * round-trips just to pick the top category slug. One query keyed on
+     * the bin set replaces all of them.
+     *
+     * @param list<string> $ipBins
+     * @return array<string, int> map of ip_bin → top category_id (only IPs
+     *     with at least one score > 0 row are present)
+     */
+    public function topCategoryByIpBins(array $ipBins): array
+    {
+        if ($ipBins === []) {
+            return [];
+        }
+
+        $names = [];
+        $params = [];
+        $types = [];
+        foreach ($ipBins as $i => $bin) {
+            $name = 'b' . $i;
+            $names[] = ':' . $name;
+            $params[$name] = $bin;
+            $types[$name] = ParameterType::LARGE_OBJECT;
+        }
+        // Order by ip_bin then score DESC; the first row encountered per
+        // bin in PHP is its top category.
+        $sql = 'SELECT ip_bin, category_id, score FROM ip_scores '
+            . 'WHERE score > 0 AND ip_bin IN (' . implode(', ', $names) . ') '
+            . 'ORDER BY ip_bin ASC, score DESC, category_id ASC';
+
+        /** @var list<array<string, mixed>> $rows */
+        $rows = $this->connection()->fetchAllAssociative($sql, $params, $types);
+
+        $out = [];
+        foreach ($rows as $row) {
+            $bin = (string) $row['ip_bin'];
+            if (!isset($out[$bin])) {
+                $out[$bin] = (int) $row['category_id'];
+            }
+        }
+
+        return $out;
+    }
+
     /**
      * Histogram of IPs by their max score across all categories, bucketed in
      * `$bucketSize`-wide bins (e.g. 5 → 0..5, 5..10, …). Only IPs with score

+ 78 - 0
api/tests/Integration/Admin/IpsControllerTest.php

@@ -170,6 +170,69 @@ final class IpsControllerTest extends AppTestCase
         self::assertSame('203.0.113.10', $body['items'][0]['ip']);
     }
 
+    /**
+     * SEC_REVIEW F32: the list endpoint now batches enrichment, top
+     * category, and effective-status lookups so the per-row data surfaces
+     * correctly without an extra DB round-trip per IP. Seed enough rows
+     * that the previous N+1 loop would have made hundreds of round-trips.
+     */
+    public function testSearchBatchesPerRowLookups(): void
+    {
+        $token = $this->createToken(TokenKind::Admin, role: Role::Viewer);
+
+        // 25 scored IPs, of which two also carry enrichment rows. The
+        // assertion checks that the batch lookup correctly attaches
+        // enrichment to exactly those IPs and nulls for the rest.
+        for ($i = 0; $i < 25; ++$i) {
+            $this->seedScored(sprintf('203.0.113.%d', 10 + $i), 'brute_force', 1.0 + ($i / 100));
+        }
+        $this->seedEnrichment('203.0.113.12', 'AA', 64500, 'AS-AA');
+        $this->seedEnrichment('203.0.113.20', 'BB', 64501, 'AS-BB');
+
+        $body = $this->decode($this->request('GET', '/api/v1/admin/ips?page=1&page_size=200', [
+            'Authorization' => 'Bearer ' . $token,
+        ]));
+        self::assertSame(25, $body['total']);
+
+        $byIp = [];
+        foreach ($body['items'] as $row) {
+            $byIp[$row['ip']] = $row;
+        }
+
+        self::assertSame('AA', $byIp['203.0.113.12']['enrichment']['country_code']);
+        self::assertSame(64500, $byIp['203.0.113.12']['enrichment']['asn']);
+        self::assertSame('BB', $byIp['203.0.113.20']['enrichment']['country_code']);
+        self::assertNull($byIp['203.0.113.10']['enrichment']);
+
+        // top_category resolves via the batched score lookup.
+        self::assertSame('brute_force', $byIp['203.0.113.10']['top_category']);
+        self::assertSame('brute_force', $byIp['203.0.113.20']['top_category']);
+
+        // Status falls out of `max_score > 0` without a per-row hasAnyScore.
+        foreach ($body['items'] as $row) {
+            self::assertSame('scored', $row['status']);
+        }
+    }
+
+    /**
+     * SEC_REVIEW F32: when an IP appears in the search results but its
+     * `max_score` is 0 (e.g. score row exists but has decayed), the row
+     * resolves to `clean` without a per-row `hasAnyScore` query.
+     */
+    public function testSearchStatusUsesMaxScoreColumn(): void
+    {
+        $token = $this->createToken(TokenKind::Admin, role: Role::Viewer);
+        $this->seedScored('203.0.113.10', 'brute_force', 0.0);
+
+        $body = $this->decode($this->request('GET', '/api/v1/admin/ips', [
+            'Authorization' => 'Bearer ' . $token,
+        ]));
+        self::assertSame(1, $body['total']);
+        self::assertSame('203.0.113.10', $body['items'][0]['ip']);
+        self::assertSame('clean', $body['items'][0]['status']);
+        self::assertNull($body['items'][0]['top_category']);
+    }
+
     public function testDetailReturnsScoresAndStatus(): void
     {
         $token = $this->createToken(TokenKind::Admin, role: Role::Viewer);
@@ -239,6 +302,21 @@ final class IpsControllerTest extends AppTestCase
         $stmt->executeStatement();
     }
 
+    private function seedEnrichment(string $ip, string $countryCode, int $asn, string $asOrg): void
+    {
+        $ipObj = IpAddress::fromString($ip);
+        $stmt = $this->db->prepare(
+            'INSERT INTO ip_enrichment (ip_bin, country_code, asn, as_org, enriched_at) '
+            . 'VALUES (:b, :c, :a, :o, :now)'
+        );
+        $stmt->bindValue('b', $ipObj->binary(), ParameterType::LARGE_OBJECT);
+        $stmt->bindValue('c', $countryCode);
+        $stmt->bindValue('a', $asn, ParameterType::INTEGER);
+        $stmt->bindValue('o', $asOrg);
+        $stmt->bindValue('now', (new \DateTimeImmutable('now'))->format('Y-m-d H:i:s'));
+        $stmt->executeStatement();
+    }
+
     private function seedReport(string $ip, string $categorySlug, int $reporterId): void
     {
         $catId = (int) $this->db->fetchOne('SELECT id FROM categories WHERE slug = :s', ['s' => $categorySlug]);