|
@@ -23,6 +23,23 @@ final class DbipDownloader implements GeoIpDownloader
|
|
|
{
|
|
{
|
|
|
private const BASE_URL = 'https://download.db-ip.com/free';
|
|
private const BASE_URL = 'https://download.db-ip.com/free';
|
|
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
|
+ * SEC_REVIEW F49: cap on the total bytes we'll decompress out of a
|
|
|
|
|
+ * DB-IP gzip. Real `dbip-country-lite-*.mmdb` is ~10 MiB and
|
|
|
|
|
+ * `dbip-asn-lite-*.mmdb` is ~5 MiB; 400 MiB is generous against
|
|
|
|
|
+ * future growth while bounding the worst case at "no single
|
|
|
|
|
+ * gunzip can fill /data". Mirrors the MaxMind tarball total cap
|
|
|
|
|
+ * so both downloaders agree on what "too big" looks like.
|
|
|
|
|
+ */
|
|
|
|
|
+ private const MAX_DECOMPRESSED_BYTES = 400 * 1024 * 1024;
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * Stream chunk size for `gzread`. Decoupled from the cap so the
|
|
|
|
|
+ * peak memory footprint of a download is the chunk, not the
|
|
|
|
|
+ * decompressed file.
|
|
|
|
|
+ */
|
|
|
|
|
+ private const GUNZIP_CHUNK_BYTES = 64 * 1024;
|
|
|
|
|
+
|
|
|
public function __construct(
|
|
public function __construct(
|
|
|
private readonly ClientInterface $http,
|
|
private readonly ClientInterface $http,
|
|
|
private readonly Clock $clock,
|
|
private readonly Clock $clock,
|
|
@@ -105,23 +122,102 @@ final class DbipDownloader implements GeoIpDownloader
|
|
|
);
|
|
);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
|
+ * Streaming gunzip with a cap on the decompressed output
|
|
|
|
|
+ * (SEC_REVIEW F49). Reads {@see GUNZIP_CHUNK_BYTES} at a time so
|
|
|
|
|
+ * the peak memory footprint is the chunk, not the file; aborts
|
|
|
|
|
+ * before exceeding {@see MAX_DECOMPRESSED_BYTES}. A malicious or
|
|
|
|
|
+ * compromised DB-IP endpoint serving a gzip whose decompressed
|
|
|
|
|
+ * form is multi-GB no longer OOMs the api or fills the disk.
|
|
|
|
|
+ *
|
|
|
|
|
+ * Public-but-`@internal` so the unit test can drive it directly
|
|
|
|
|
+ * with a small fixture; production call site uses the default
|
|
|
|
|
+ * cap. Wraps the production `gunzip()` method so the real call
|
|
|
|
|
+ * site stays a single-arg path.
|
|
|
|
|
+ *
|
|
|
|
|
+ * @internal
|
|
|
|
|
+ */
|
|
|
|
|
+ public static function gunzipWithCap(string $gzPath, string $outPath, int $maxBytes): void
|
|
|
|
|
+ {
|
|
|
|
|
+ self::gunzipImpl($gzPath, $outPath, $maxBytes);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
private function gunzip(string $gzPath, string $outPath): void
|
|
private function gunzip(string $gzPath, string $outPath): void
|
|
|
{
|
|
{
|
|
|
- $compressed = @file_get_contents($gzPath);
|
|
|
|
|
- if ($compressed === false) {
|
|
|
|
|
- throw new DownloaderException(sprintf('cannot read downloaded gz at %s', $gzPath));
|
|
|
|
|
|
|
+ self::gunzipImpl($gzPath, $outPath, self::MAX_DECOMPRESSED_BYTES);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static function gunzipImpl(string $gzPath, string $outPath, int $maxBytes): void
|
|
|
|
|
+ {
|
|
|
|
|
+ $in = @gzopen($gzPath, 'rb');
|
|
|
|
|
+ if ($in === false) {
|
|
|
|
|
+ throw new DownloaderException(sprintf('cannot gzopen %s', $gzPath));
|
|
|
}
|
|
}
|
|
|
|
|
+ $out = @fopen($outPath, 'wb');
|
|
|
|
|
+ if ($out === false) {
|
|
|
|
|
+ gzclose($in);
|
|
|
|
|
+
|
|
|
|
|
+ throw new DownloaderException(sprintf('cannot fopen %s for write', $outPath));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $written = 0;
|
|
|
|
|
+ $cleanup = static function (mixed $in, mixed $out, string $outPath): void {
|
|
|
|
|
+ if ($in !== false) {
|
|
|
|
|
+ @gzclose($in);
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($out !== false) {
|
|
|
|
|
+ @fclose($out);
|
|
|
|
|
+ }
|
|
|
|
|
+ // Don't leave a half-decoded file on disk: caller treats
|
|
|
|
|
+ // existence of `$outPath` as "verified ready to swap".
|
|
|
|
|
+ @unlink($outPath);
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
try {
|
|
try {
|
|
|
- $plain = @gzdecode($compressed);
|
|
|
|
|
|
|
+ while (!gzeof($in)) {
|
|
|
|
|
+ $chunk = @gzread($in, self::GUNZIP_CHUNK_BYTES);
|
|
|
|
|
+ if ($chunk === false) {
|
|
|
|
|
+ $cleanup($in, $out, $outPath);
|
|
|
|
|
+
|
|
|
|
|
+ throw new DownloaderException(sprintf('gzread failed on %s', $gzPath));
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($chunk === '') {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ $written += strlen($chunk);
|
|
|
|
|
+ if ($written > $maxBytes) {
|
|
|
|
|
+ $cleanup($in, $out, $outPath);
|
|
|
|
|
+
|
|
|
|
|
+ throw new DownloaderException(sprintf(
|
|
|
|
|
+ 'dbip gunzip output exceeds cap %d bytes',
|
|
|
|
|
+ $maxBytes,
|
|
|
|
|
+ ));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (@fwrite($out, $chunk) === false) {
|
|
|
|
|
+ $cleanup($in, $out, $outPath);
|
|
|
|
|
+
|
|
|
|
|
+ throw new DownloaderException(sprintf('write to %s failed', $outPath));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
} catch (Throwable $e) {
|
|
} catch (Throwable $e) {
|
|
|
- throw new DownloaderException(sprintf('gzdecode failed for %s', $gzPath), 0, $e);
|
|
|
|
|
- }
|
|
|
|
|
- if ($plain === false || $plain === '') {
|
|
|
|
|
- throw new DownloaderException(sprintf('gzdecode produced empty output for %s', $gzPath));
|
|
|
|
|
|
|
+ if (!$e instanceof DownloaderException) {
|
|
|
|
|
+ $cleanup($in, $out, $outPath);
|
|
|
|
|
+
|
|
|
|
|
+ throw new DownloaderException(sprintf('gunzip of %s failed', $gzPath), 0, $e);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ throw $e;
|
|
|
}
|
|
}
|
|
|
- if (@file_put_contents($outPath, $plain) === false) {
|
|
|
|
|
- throw new DownloaderException(sprintf('cannot write decoded mmdb to %s', $outPath));
|
|
|
|
|
|
|
+
|
|
|
|
|
+ @gzclose($in);
|
|
|
|
|
+ @fclose($out);
|
|
|
|
|
+
|
|
|
|
|
+ if ($written === 0) {
|
|
|
|
|
+ @unlink($outPath);
|
|
|
|
|
+
|
|
|
|
|
+ throw new DownloaderException(sprintf('gunzip produced empty output for %s', $gzPath));
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
@unlink($gzPath);
|
|
@unlink($gzPath);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|