ImportController.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. <?php
  2. /*
  3. * Copyright 2026 Alessandro Chiapparini <sprint_planer_web@chiapparini.org>
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * See the LICENSE file in the project root for the full license text.
  9. */
  10. declare(strict_types=1);
  11. namespace App\Controllers;
  12. use App\Auth\SessionGuard;
  13. use App\Domain\Import\ImportResult;
  14. use App\Domain\Import\ParsedSheet;
  15. use App\Domain\User;
  16. use App\Http\Request;
  17. use App\Http\Response;
  18. use App\Http\View;
  19. use App\Repositories\SprintRepository;
  20. use App\Repositories\UserRepository;
  21. use App\Services\AuditLogger;
  22. use App\Services\Import\SprintImporter;
  23. use App\Services\Import\XlsxSprintImporter;
  24. use PDO;
  25. use Throwable;
  26. /**
  27. * Phase 20 — Two-step XLSX → Sprint import wizard.
  28. *
  29. * GET /sprints/import upload form
  30. * POST /sprints/import parse, stash in session, redirect to preview
  31. * GET /sprints/import/{token} preview + per-sheet target picker
  32. * POST /sprints/import/{token} commit selected sheets in one TX each,
  33. * redirect to the first sprint with a flash.
  34. *
  35. * The uploaded XLSX is parsed once and the structured result is stored in
  36. * the session under a random token; the file itself is not persisted to disk.
  37. * Tokens expire after 30 minutes.
  38. */
  39. final class ImportController
  40. {
  41. public const SESSION_KEY = 'sp_imports';
  42. private const TTL_SECONDS = 1800;
  43. private const MAX_FILE_BYTES = 5 * 1024 * 1024;
  44. /**
  45. * R01-N14: hard cap on the JSON-encoded preview blob we stash in
  46. * `$_SESSION` between the upload and the commit step. The 5 MB
  47. * upload cap (`MAX_FILE_BYTES`) bounds raw bytes coming in, but
  48. * parsed XLSX expansion is unbounded — a hostile workbook with
  49. * many tabs could blow the session file size. Two megabytes is
  50. * roomy for a real planning workbook (the production
  51. * `Tool_Sprint Planning` one rarely exceeds a few hundred KB
  52. * serialised) and small enough that on-disk session IO stays
  53. * snappy.
  54. */
  55. public const MAX_SESSION_PAYLOAD_BYTES = 2 * 1024 * 1024;
  56. public function __construct(
  57. private readonly PDO $pdo,
  58. private readonly UserRepository $users,
  59. private readonly SprintRepository $sprints,
  60. private readonly XlsxSprintImporter $parser,
  61. private readonly SprintImporter $committer,
  62. private readonly View $view,
  63. private readonly AuditLogger $audit,
  64. ) {
  65. }
  66. public function newForm(Request $req): Response
  67. {
  68. $actor = SessionGuard::requireAdmin($this->users);
  69. if ($actor instanceof Response) {
  70. return $actor;
  71. }
  72. return Response::html($this->view->render('sprints/import_upload', [
  73. 'title' => 'Import sprints',
  74. 'currentUser' => $actor,
  75. 'csrfToken' => SessionGuard::csrfToken(),
  76. 'error' => $req->queryString('error'),
  77. ]));
  78. }
  79. public function upload(Request $req): Response
  80. {
  81. $actor = SessionGuard::requireAdmin($this->users);
  82. if ($actor instanceof Response) {
  83. return $actor;
  84. }
  85. if (!SessionGuard::verifyCsrf($req)) {
  86. return Response::text('CSRF token invalid', 403);
  87. }
  88. // Validate the upload up-front; fail-closed.
  89. if (!isset($_FILES['xlsx']) || !is_array($_FILES['xlsx'])) {
  90. return Response::redirect('/sprints/import?error=no_file');
  91. }
  92. $file = $_FILES['xlsx'];
  93. if (($file['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) {
  94. $code = self::uploadErrorCode((int) ($file['error'] ?? UPLOAD_ERR_NO_FILE));
  95. return Response::redirect('/sprints/import?error=' . $code);
  96. }
  97. $size = (int) ($file['size'] ?? 0);
  98. if ($size <= 0 || $size > self::MAX_FILE_BYTES) {
  99. return Response::redirect('/sprints/import?error=size');
  100. }
  101. $tmp = (string) ($file['tmp_name'] ?? '');
  102. if ($tmp === '' || !is_uploaded_file($tmp)) {
  103. return Response::redirect('/sprints/import?error=upload_invalid');
  104. }
  105. $orig = (string) ($file['name'] ?? '');
  106. if (!self::looksLikeXlsx($orig, $tmp)) {
  107. return Response::redirect('/sprints/import?error=not_xlsx');
  108. }
  109. try {
  110. $sheets = $this->parser->parse($tmp);
  111. } catch (Throwable) {
  112. return Response::redirect('/sprints/import?error=parse_failed');
  113. }
  114. // R01-N14: enforce the per-token serialised cap. We encode once
  115. // for the size check and stash the array form (cheap to read,
  116. // no decode hop on preview/commit). A parse that explodes past
  117. // the cap is rejected outright; nothing lands in the session.
  118. $sheetsArr = array_map(fn(ParsedSheet $s) => $s->toArray(), $sheets);
  119. $payloadBytes = self::encodedPayloadBytes($sheetsArr);
  120. if ($payloadBytes > self::MAX_SESSION_PAYLOAD_BYTES) {
  121. return Response::redirect('/sprints/import?error=too_large_payload');
  122. }
  123. $token = bin2hex(random_bytes(16));
  124. SessionGuard::start();
  125. if (!isset($_SESSION[self::SESSION_KEY]) || !is_array($_SESSION[self::SESSION_KEY])) {
  126. $_SESSION[self::SESSION_KEY] = [];
  127. }
  128. $_SESSION[self::SESSION_KEY][$token] = [
  129. 'created_at' => time(),
  130. 'sheets' => $sheetsArr,
  131. 'file_name' => basename($orig),
  132. 'payload_bytes' => $payloadBytes,
  133. ];
  134. $this->pruneSessionImports($req, $actor);
  135. return Response::redirect('/sprints/import/' . $token);
  136. }
  137. public function preview(Request $req, array $params): Response
  138. {
  139. $actor = SessionGuard::requireAdmin($this->users);
  140. if ($actor instanceof Response) {
  141. return $actor;
  142. }
  143. $token = (string) ($params['token'] ?? '');
  144. $entry = $this->loadSessionEntry($token, $req, $actor);
  145. if ($entry === null) {
  146. return Response::redirect('/sprints/import?error=expired');
  147. }
  148. $sheets = array_map(
  149. static fn(array $arr) => ParsedSheet::fromArray($arr),
  150. $entry['sheets'],
  151. );
  152. // Existing-sprint candidates for the per-sheet target picker: only
  153. // empty sprints (no weeks/workers/tasks) qualify. Spec §plan §2.
  154. $emptySprints = $this->emptySprintCandidates();
  155. // Pre-compute summary stats for the diff panel.
  156. $summaries = [];
  157. $existingWorkerFolds = $this->existingWorkerFolds();
  158. foreach ($sheets as $sheet) {
  159. $summaries[] = $this->summarise($sheet, $existingWorkerFolds);
  160. }
  161. return Response::html($this->view->render('sprints/import_preview', [
  162. 'title' => 'Import preview',
  163. 'currentUser' => $actor,
  164. 'csrfToken' => SessionGuard::csrfToken(),
  165. 'token' => $token,
  166. 'fileName' => (string) ($entry['file_name'] ?? ''),
  167. 'sheets' => $sheets,
  168. 'summaries' => $summaries,
  169. 'emptySprints' => $emptySprints,
  170. 'error' => $req->queryString('error'),
  171. ]));
  172. }
  173. public function commit(Request $req, array $params): Response
  174. {
  175. $actor = SessionGuard::requireAdmin($this->users);
  176. if ($actor instanceof Response) {
  177. return $actor;
  178. }
  179. if (!SessionGuard::verifyCsrf($req)) {
  180. return Response::text('CSRF token invalid', 403);
  181. }
  182. $token = (string) ($params['token'] ?? '');
  183. $entry = $this->loadSessionEntry($token, $req, $actor);
  184. if ($entry === null) {
  185. return Response::redirect('/sprints/import?error=expired');
  186. }
  187. $sheets = array_map(
  188. static fn(array $arr) => ParsedSheet::fromArray($arr),
  189. $entry['sheets'],
  190. );
  191. $results = [];
  192. foreach ($sheets as $idx => $sheet) {
  193. $skip = $req->postString("skip_{$idx}");
  194. if ($skip === '1') {
  195. continue;
  196. }
  197. $name = $req->postString("name_{$idx}");
  198. $startDate = $req->postString("start_{$idx}");
  199. $endDate = $req->postString("end_{$idx}");
  200. $target = $req->postString("target_{$idx}");
  201. $existing = $req->postString("existing_{$idx}");
  202. $existingId = ($target === 'existing' && $existing !== '' && ctype_digit($existing))
  203. ? (int) $existing
  204. : null;
  205. if ($name === '') {
  206. $name = $sheet->sheetName;
  207. }
  208. if ($startDate === '') {
  209. $startDate = $sheet->inferredStartDate ?? '';
  210. }
  211. if ($endDate === '') {
  212. $endDate = $sheet->inferredEndDate ?? '';
  213. }
  214. try {
  215. $results[] = $this->committer->commit(
  216. sheet: $sheet,
  217. sprintName: $name,
  218. startDate: $startDate,
  219. endDate: $endDate,
  220. target: $target === 'existing' ? 'existing' : 'new',
  221. existingSprintId: $existingId,
  222. req: $req,
  223. actor: $actor,
  224. );
  225. } catch (Throwable $e) {
  226. // Bail on first failure: prior sheets in this loop already
  227. // committed, but each is a self-contained transaction so the
  228. // user keeps the partial progress and can re-try the rest.
  229. SessionGuard::start();
  230. $_SESSION['flash_import_error'] = sprintf(
  231. 'Sheet "%s" failed: %s',
  232. $sheet->sheetName,
  233. $e->getMessage(),
  234. );
  235. return Response::redirect('/sprints/import/' . $token . '?error=commit');
  236. }
  237. }
  238. // Drop the session entry; the wizard is done.
  239. if (isset($_SESSION[self::SESSION_KEY][$token])) {
  240. unset($_SESSION[self::SESSION_KEY][$token]);
  241. }
  242. if ($results === []) {
  243. return Response::redirect('/sprints/import?error=nothing_selected');
  244. }
  245. // Land on the first imported sprint with an audible flash.
  246. SessionGuard::start();
  247. $_SESSION['flash_import_summary'] = self::summariseResults($results);
  248. return Response::redirect('/sprints/' . $results[0]->sprintId);
  249. }
  250. // ------------------------------------------------------------------ utils
  251. /** @return array{sheets: list<array<string,mixed>>, file_name: string, created_at: int}|null */
  252. private function loadSessionEntry(string $token, Request $req, User $actor): ?array
  253. {
  254. if (!preg_match('/^[0-9a-f]{32}$/', $token)) {
  255. return null;
  256. }
  257. SessionGuard::start();
  258. $bag = $_SESSION[self::SESSION_KEY] ?? [];
  259. if (!is_array($bag) || !isset($bag[$token]) || !is_array($bag[$token])) {
  260. return null;
  261. }
  262. $entry = $bag[$token];
  263. $createdAt = (int) ($entry['created_at'] ?? 0);
  264. if ($createdAt + self::TTL_SECONDS < time()) {
  265. // R01-N14: emit IMPORT_PREVIEW_ABANDONED for the token the
  266. // user just tried to use. They'll be redirected to the
  267. // upload form with `?error=expired`; the audit row makes the
  268. // expiry visible in `/audit` instead of disappearing
  269. // silently.
  270. $this->recordAbandonedImport($entry, $req, $actor);
  271. unset($_SESSION[self::SESSION_KEY][$token]);
  272. return null;
  273. }
  274. return $entry;
  275. }
  276. private function pruneSessionImports(Request $req, User $actor): void
  277. {
  278. SessionGuard::start();
  279. $bag = $_SESSION[self::SESSION_KEY] ?? [];
  280. if (!is_array($bag)) {
  281. return;
  282. }
  283. $cutoff = time() - self::TTL_SECONDS;
  284. foreach ($bag as $tok => $row) {
  285. if (!is_array($row)) {
  286. unset($_SESSION[self::SESSION_KEY][$tok]);
  287. continue;
  288. }
  289. if ((int) ($row['created_at'] ?? 0) < $cutoff) {
  290. // R01-N14: token aged out without a commit. Same audit
  291. // row as in `loadSessionEntry`.
  292. $this->recordAbandonedImport($row, $req, $actor);
  293. unset($_SESSION[self::SESSION_KEY][$tok]);
  294. }
  295. }
  296. }
  297. /**
  298. * R01-N14: write an IMPORT_PREVIEW_ABANDONED audit row for a
  299. * preview token that expired before being committed. `entity_type`
  300. * is `import_token` because no DB row backs the preview blob; the
  301. * row carries enough metadata (file name, age, sheet count,
  302. * payload size) for an admin reviewing `/audit` to reconstruct
  303. * what was abandoned.
  304. *
  305. * @param array<string,mixed> $entry
  306. */
  307. private function recordAbandonedImport(array $entry, Request $req, User $actor): void
  308. {
  309. $this->audit->recordForRequest(
  310. action: 'IMPORT_PREVIEW_ABANDONED',
  311. entityType: 'import_token',
  312. entityId: null,
  313. before: null,
  314. after: self::abandonedAuditPayload($entry, time()),
  315. req: $req,
  316. actor: $actor,
  317. );
  318. }
  319. /** @return list<array{id:int,name:string,startDate:string,endDate:string}> */
  320. private function emptySprintCandidates(): array
  321. {
  322. $stmt = $this->pdo->query(
  323. 'SELECT s.id, s.name, s.start_date, s.end_date
  324. FROM sprints s
  325. WHERE NOT EXISTS (SELECT 1 FROM sprint_weeks WHERE sprint_id = s.id)
  326. AND NOT EXISTS (SELECT 1 FROM sprint_workers WHERE sprint_id = s.id)
  327. AND NOT EXISTS (SELECT 1 FROM tasks WHERE sprint_id = s.id)
  328. ORDER BY s.start_date DESC, s.id DESC'
  329. );
  330. $out = [];
  331. foreach ($stmt as $row) {
  332. $out[] = [
  333. 'id' => (int) $row['id'],
  334. 'name' => (string) $row['name'],
  335. 'startDate' => (string) $row['start_date'],
  336. 'endDate' => (string) $row['end_date'],
  337. ];
  338. }
  339. return $out;
  340. }
  341. /** @return array<string,bool> case-folded name => true */
  342. private function existingWorkerFolds(): array
  343. {
  344. $stmt = $this->pdo->query('SELECT name FROM workers');
  345. $out = [];
  346. foreach ($stmt as $row) {
  347. $out[SprintImporter::fold((string) $row['name'])] = true;
  348. }
  349. return $out;
  350. }
  351. /**
  352. * @param array<string,bool> $existingFolds
  353. * @return array<string,mixed>
  354. */
  355. private function summarise(ParsedSheet $sheet, array $existingFolds): array
  356. {
  357. $newWorkers = [];
  358. foreach ($sheet->workers as $w) {
  359. if (!isset($existingFolds[SprintImporter::fold($w->name)])) {
  360. $newWorkers[] = $w->name;
  361. }
  362. }
  363. $cells = 0;
  364. $byStatus = [
  365. 'zugewiesen' => 0,
  366. 'gestartet' => 0,
  367. 'abgeschlossen' => 0,
  368. 'abgebrochen' => 0,
  369. ];
  370. foreach ($sheet->tasks as $t) {
  371. foreach ($t->assignments as $a) {
  372. if ($a->days > 0) {
  373. $cells++;
  374. }
  375. $byStatus[$a->status] = ($byStatus[$a->status] ?? 0) + 1;
  376. }
  377. }
  378. return [
  379. 'newWorkers' => $newWorkers,
  380. 'workerCount' => count($sheet->workers),
  381. 'taskCount' => count($sheet->tasks),
  382. 'weekCount' => count($sheet->weeks),
  383. 'cellCount' => $cells,
  384. 'statusCounts' => $byStatus,
  385. ];
  386. }
  387. /** @param list<ImportResult> $results */
  388. private static function summariseResults(array $results): string
  389. {
  390. $parts = [];
  391. foreach ($results as $r) {
  392. $bits = ["{$r->workerCount} workers", "{$r->taskCount} tasks", "{$r->assignmentCellCount} cells"];
  393. if ($r->createdWorkers !== []) {
  394. $bits[] = count($r->createdWorkers) . ' new workers';
  395. }
  396. $parts[] = "“{$r->sprintName}” imported (" . implode(', ', $bits) . ').';
  397. }
  398. return implode(' ', $parts);
  399. }
  400. /**
  401. * R01-N14: serialised-byte estimate for a `ParsedSheet[]::toArray()`
  402. * payload. Pure, so the cap can be unit-tested without a real
  403. * upload + session round-trip.
  404. *
  405. * Encoding mirrors `AuditLogger::encodeJson`: UTF-8 plain, no
  406. * escaping of slashes — what the session bag *would* serialise to
  407. * if we were JSON-encoding it. PHP's session serialiser is
  408. * different (PHP-serialized format) but the JSON byte length is a
  409. * reliable proxy and matches the contract recorded in REVIEW_01.
  410. *
  411. * @param array<int,array<string,mixed>> $sheetsArr
  412. */
  413. public static function encodedPayloadBytes(array $sheetsArr): int
  414. {
  415. $json = json_encode(
  416. $sheetsArr,
  417. JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES,
  418. );
  419. return $json === false ? 0 : strlen($json);
  420. }
  421. /**
  422. * R01-N14: payload for the IMPORT_PREVIEW_ABANDONED audit row.
  423. * Keeps just enough metadata to reconstruct what was lost without
  424. * persisting any user task content.
  425. *
  426. * @param array<string,mixed> $entry the session entry as stashed
  427. * by `upload()`
  428. * @return array<string,mixed>
  429. */
  430. public static function abandonedAuditPayload(array $entry, int $now): array
  431. {
  432. $createdAt = (int) ($entry['created_at'] ?? 0);
  433. $sheets = (array) ($entry['sheets'] ?? []);
  434. // Missing `created_at` reads as 0; without this guard the
  435. // computed age would be the full unix-epoch offset (~50 yr),
  436. // which is alarming noise in `/audit` for what is just a
  437. // malformed entry. Same shape clamps clock-skew "future" rows
  438. // to 0 instead of negative.
  439. $age = $createdAt > 0 ? max(0, $now - $createdAt) : 0;
  440. return [
  441. 'file_name' => (string) ($entry['file_name'] ?? ''),
  442. 'sheet_count' => count($sheets),
  443. 'payload_bytes' => (int) ($entry['payload_bytes'] ?? 0),
  444. 'age_seconds' => $age,
  445. 'created_at' => $createdAt > 0 ? gmdate('Y-m-d\TH:i:s\Z', $createdAt) : '',
  446. ];
  447. }
  448. private static function looksLikeXlsx(string $origName, string $tmpPath): bool
  449. {
  450. if (!preg_match('/\.xlsx$/i', $origName)) {
  451. return false;
  452. }
  453. // XLSX is a ZIP container; the file's first 4 bytes start with PK\x03\x04 or PK\x05\x06.
  454. $fh = @fopen($tmpPath, 'rb');
  455. if ($fh === false) {
  456. return false;
  457. }
  458. $head = (string) fread($fh, 4);
  459. fclose($fh);
  460. if (strlen($head) !== 4) {
  461. return false;
  462. }
  463. return $head[0] === 'P' && $head[1] === 'K'
  464. && (($head[2] === "\x03" && $head[3] === "\x04")
  465. || ($head[2] === "\x05" && $head[3] === "\x06"));
  466. }
  467. private static function uploadErrorCode(int $code): string
  468. {
  469. return match ($code) {
  470. UPLOAD_ERR_INI_SIZE,
  471. UPLOAD_ERR_FORM_SIZE => 'too_big',
  472. UPLOAD_ERR_PARTIAL => 'partial',
  473. UPLOAD_ERR_NO_FILE => 'no_file',
  474. UPLOAD_ERR_NO_TMP_DIR,
  475. UPLOAD_ERR_CANT_WRITE,
  476. UPLOAD_ERR_EXTENSION => 'server',
  477. default => 'unknown',
  478. };
  479. }
  480. }