Skip to content

Commit 49f5939

Browse files
committed
perf(preview): bulk process preview regeneration
Signed-off-by: Anna Larch <[email protected]>
1 parent 1564846 commit 49f5939

File tree

2 files changed

+510
-69
lines changed

2 files changed

+510
-69
lines changed

lib/private/Preview/Storage/LocalPreviewStorage.php

Lines changed: 226 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use OC\Preview\Db\Preview;
1717
use OC\Preview\Db\PreviewMapper;
1818
use OCP\DB\Exception;
19+
use OCP\DB\QueryBuilder\IQueryBuilder;
1920
use OCP\Files\IMimeTypeDetector;
2021
use OCP\Files\IMimeTypeLoader;
2122
use OCP\Files\IRootFolder;
@@ -30,6 +31,8 @@
3031
use RecursiveIteratorIterator;
3132

3233
class LocalPreviewStorage implements IPreviewStorage {
34+
private const SCAN_BATCH_SIZE = 1000;
35+
3336
public function __construct(
3437
private readonly IConfig $config,
3538
private readonly PreviewMapper $previewMapper,
@@ -117,88 +120,241 @@ public function scan(): int {
117120
if (!file_exists($this->getPreviewRootFolder())) {
118121
return 0;
119122
}
123+
120124
$scanner = new RecursiveDirectoryIterator($this->getPreviewRootFolder());
121125
$previewsFound = 0;
122-
$skipFiles = [];
126+
127+
/**
128+
* Use an associative array keyed by path for O(1) lookup instead of
129+
* the O(n) in_array() the original code used.
130+
*
131+
* @var array<string, true> $skipPaths
132+
*/
133+
$skipPaths = [];
134+
135+
/**
136+
* Pending previews grouped by fileId. A single original file can have
137+
* many preview variants (different sizes/formats), so we group them to
138+
* issue one filecache lookup per original file rather than one per
139+
* preview variant.
140+
*
141+
* @var array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
142+
*/
143+
$pendingByFileId = [];
144+
145+
/**
146+
* path_hash => realPath for legacy filecache entries that need to be
147+
* cleaned up. Only populated when $checkForFileCache is true.
148+
*
149+
* @var array<string, string> $pendingPathHashes
150+
*/
151+
$pendingPathHashes = [];
152+
$pendingCount = 0;
153+
123154
foreach (new RecursiveIteratorIterator($scanner) as $file) {
124-
if ($file->isFile() && !in_array((string)$file, $skipFiles, true)) {
125-
$preview = Preview::fromPath((string)$file, $this->mimeTypeDetector);
126-
if ($preview === false) {
127-
$this->logger->error('Unable to parse preview information for ' . $file->getRealPath());
128-
continue;
129-
}
155+
if (!$file->isFile()) {
156+
continue;
157+
}
158+
159+
$filePath = $file->getPathname();
160+
if (isset($skipPaths[$filePath])) {
161+
continue;
162+
}
163+
164+
$preview = Preview::fromPath($filePath, $this->mimeTypeDetector);
165+
if ($preview === false) {
166+
$this->logger->error('Unable to parse preview information for ' . $file->getRealPath());
167+
continue;
168+
}
169+
170+
$preview->setSize($file->getSize());
171+
$preview->setMtime($file->getMtime());
172+
$preview->setEncrypted(false);
173+
174+
$realPath = $file->getRealPath();
175+
$pendingByFileId[$preview->getFileId()][] = [
176+
'preview' => $preview,
177+
'filePath' => $filePath,
178+
'realPath' => $realPath,
179+
];
180+
$pendingCount++;
181+
182+
if ($checkForFileCache) {
183+
$relativePath = str_replace($this->getRootFolder() . '/', '', $realPath);
184+
$pendingPathHashes[md5($relativePath)] = $realPath;
185+
}
186+
187+
if ($pendingCount >= self::SCAN_BATCH_SIZE) {
188+
$this->connection->beginTransaction();
130189
try {
131-
$preview->setSize($file->getSize());
132-
$preview->setMtime($file->getMtime());
133-
$preview->setEncrypted(false);
134-
135-
$qb = $this->connection->getQueryBuilder();
136-
$result = $qb->select('storage', 'etag', 'mimetype')
137-
->from('filecache')
138-
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($preview->getFileId())))
139-
->setMaxResults(1)
140-
->runAcrossAllShards() // Unavoidable because we can't extract the storage_id from the preview name
141-
->executeQuery()
142-
->fetchAssociative();
143-
144-
if ($result === false) {
145-
// original file is deleted
146-
$this->logger->warning('Original file ' . $preview->getFileId() . ' was not found. Deleting preview at ' . $file->getRealPath());
147-
@unlink($file->getRealPath());
148-
continue;
149-
}
190+
$previewsFound += $this->processScanBatch($pendingByFileId, $pendingPathHashes, $checkForFileCache, $skipPaths);
191+
$this->connection->commit();
192+
} catch (\Exception $e) {
193+
$this->connection->rollBack();
194+
$this->logger->error($e->getMessage(), ['exception' => $e]);
195+
throw $e;
196+
}
197+
$pendingByFileId = [];
198+
$pendingPathHashes = [];
199+
$pendingCount = 0;
200+
}
201+
}
202+
203+
if ($pendingCount > 0) {
204+
$this->connection->beginTransaction();
205+
try {
206+
$previewsFound += $this->processScanBatch($pendingByFileId, $pendingPathHashes, $checkForFileCache, $skipPaths);
207+
$this->connection->commit();
208+
} catch (\Exception $e) {
209+
$this->connection->rollBack();
210+
$this->logger->error($e->getMessage(), ['exception' => $e]);
211+
throw $e;
212+
}
213+
}
214+
215+
return $previewsFound;
216+
}
217+
218+
/**
219+
* Process one batch of preview files collected during scan().
220+
*
221+
* @param array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
222+
* @param array<string, string> $pendingPathHashes path_hash => realPath
223+
* @param array<string, true> $skipPaths Modified in place: newly-moved paths are added so the outer iterator skips them.
224+
*/
225+
private function processScanBatch(
226+
array $pendingByFileId,
227+
array $pendingPathHashes,
228+
bool $checkForFileCache,
229+
array &$skipPaths,
230+
): int {
231+
$filecacheByFileId = $this->fetchFilecacheByFileIds(array_keys($pendingByFileId));
232+
$legacyByPathHash = [];
233+
if ($checkForFileCache && $pendingPathHashes !== []) {
234+
$legacyByPathHash = $this->fetchFilecacheByPathHashes(array_keys($pendingPathHashes));
235+
}
236+
237+
$previewsFound = 0;
238+
foreach ($pendingByFileId as $fileId => $items) {
239+
if (!isset($filecacheByFileId[$fileId])) {
240+
// Original file has been deleted – clean up all its previews.
241+
foreach ($items as $item) {
242+
$this->logger->warning('Original file ' . $fileId . ' was not found. Deleting preview at ' . $item['realPath']);
243+
@unlink($item['realPath']);
244+
}
245+
continue;
246+
}
247+
248+
$filecacheRow = $filecacheByFileId[$fileId];
249+
foreach ($items as $item) {
250+
$preview = $item['preview'];
150251

151-
if ($checkForFileCache) {
152-
$relativePath = str_replace($this->getRootFolder() . '/', '', $file->getRealPath());
252+
if ($checkForFileCache) {
253+
$relativePath = str_replace($this->getRootFolder() . '/', '', $item['realPath']);
254+
$pathHash = md5($relativePath);
255+
if (isset($legacyByPathHash[$pathHash])) {
256+
$legacyRow = $legacyByPathHash[$pathHash];
153257
$qb = $this->connection->getQueryBuilder();
154-
$result2 = $qb->select('fileid', 'storage', 'etag', 'mimetype', 'parent')
155-
->from('filecache')
156-
->where($qb->expr()->eq('path_hash', $qb->createNamedParameter(md5($relativePath))))
157-
->runAcrossAllShards()
158-
->setMaxResults(1)
159-
->executeQuery()
160-
->fetchAssociative();
161-
162-
if ($result2 !== false) {
163-
$qb->delete('filecache')
164-
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($result2['fileid'])))
165-
->andWhere($qb->expr()->eq('storage', $qb->createNamedParameter($result2['storage'])))
166-
->executeStatement();
167-
$this->deleteParentsFromFileCache((int)$result2['parent'], (int)$result2['storage']);
168-
}
258+
$qb->delete('filecache')
259+
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($legacyRow['fileid'])))
260+
->andWhere($qb->expr()->eq('storage', $qb->createNamedParameter($legacyRow['storage'])))
261+
->executeStatement();
262+
$this->deleteParentsFromFileCache((int)$legacyRow['parent'], (int)$legacyRow['storage']);
169263
}
264+
}
170265

171-
$preview->setStorageId((int)$result['storage']);
172-
$preview->setEtag($result['etag']);
173-
$preview->setSourceMimetype($this->mimeTypeLoader->getMimetypeById((int)$result['mimetype']));
174-
$preview->generateId();
175-
// try to insert, if that fails the preview is already in the DB
176-
$this->previewMapper->insert($preview);
266+
$preview->setStorageId((int)$filecacheRow['storage']);
267+
$preview->setEtag($filecacheRow['etag']);
268+
$preview->setSourceMimetype($this->mimeTypeLoader->getMimetypeById((int)$filecacheRow['mimetype']));
269+
$preview->generateId();
177270

178-
// Move old flat preview to new format
179-
$dirName = str_replace($this->getPreviewRootFolder(), '', $file->getPath());
180-
if (preg_match('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/', $dirName) !== 1) {
181-
$previewPath = $this->constructPath($preview);
182-
$this->createParentFiles($previewPath);
183-
$ok = rename($file->getRealPath(), $previewPath);
184-
if (!$ok) {
185-
throw new LogicException('Failed to move ' . $file->getRealPath() . ' to ' . $previewPath);
186-
}
187-
188-
$skipFiles[] = $previewPath;
189-
}
271+
$this->connection->beginTransaction();
272+
try {
273+
$this->previewMapper->insert($preview);
274+
$this->connection->commit();
190275
} catch (Exception $e) {
276+
$this->connection->rollBack();
191277
if ($e->getReason() !== Exception::REASON_UNIQUE_CONSTRAINT_VIOLATION) {
192278
throw $e;
193279
}
194280
}
281+
282+
// Move old flat preview to new nested directory format.
283+
$dirName = str_replace($this->getPreviewRootFolder(), '', $item['filePath']);
284+
if (preg_match('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/', $dirName) !== 1) {
285+
$previewPath = $this->constructPath($preview);
286+
$this->createParentFiles($previewPath);
287+
$ok = rename($item['realPath'], $previewPath);
288+
if (!$ok) {
289+
throw new LogicException('Failed to move ' . $item['realPath'] . ' to ' . $previewPath);
290+
}
291+
// Mark the destination so the outer iterator skips it if it encounters the path later.
292+
$skipPaths[$previewPath] = true;
293+
}
294+
195295
$previewsFound++;
196296
}
197297
}
198298

199299
return $previewsFound;
200300
}
201301

302+
/**
303+
* Bulk-fetch filecache rows for a set of fileIds.
304+
*
305+
* @param int[] $fileIds
306+
*/
307+
private function fetchFilecacheByFileIds(array $fileIds): array {
308+
if (empty($fileIds)) {
309+
return [];
310+
}
311+
312+
$result = [];
313+
$qb = $this->connection->getQueryBuilder();
314+
$qb->select('fileid', 'storage', 'etag', 'mimetype')
315+
->from('filecache');
316+
foreach (array_chunk($fileIds, 1000) as $chunk) {
317+
$qb->andWhere(
318+
$qb->expr()->in('fileid', $qb->createNamedParameter($chunk, IQueryBuilder::PARAM_INT_ARRAY))
319+
);
320+
}
321+
$rows = $qb->runAcrossAllShards()
322+
->executeQuery();
323+
while ($row = $rows->fetchAssociative()) {
324+
$result[(int)$row['fileid']] = $row;
325+
}
326+
$rows->closeCursor();
327+
return $result;
328+
}
329+
330+
/**
331+
* Bulk-fetch filecache rows for a set of path_hashes (legacy migration).
332+
*
333+
* @param string[] $pathHashes
334+
*/
335+
private function fetchFilecacheByPathHashes(array $pathHashes): array {
336+
if (empty($pathHashes)) {
337+
return [];
338+
}
339+
340+
$result = [];
341+
$qb = $this->connection->getQueryBuilder();
342+
$qb->select('fileid', 'storage', 'etag', 'mimetype', 'parent', 'path_hash')
343+
->from('filecache');
344+
foreach (array_chunk($pathHashes, 1000) as $chunk) {
345+
$qb->andWhere(
346+
$qb->expr()->in('path_hash', $qb->createNamedParameter($chunk, IQueryBuilder::PARAM_STR_ARRAY))
347+
);
348+
}
349+
$rows = $qb->runAcrossAllShards()
350+
->executeQuery();
351+
while ($row = $rows->fetchAssociative()) {
352+
$result[$row['path_hash']] = $row;
353+
}
354+
$rows->closeCursor();
355+
return $result;
356+
}
357+
202358
/**
203359
* Recursive method that deletes the folder and its parent folders if it's not
204360
* empty.
@@ -210,10 +366,11 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
210366
->where($qb->expr()->eq('parent', $qb->createNamedParameter($folderId)))
211367
->setMaxResults(1)
212368
->runAcrossAllShards()
213-
->executeQuery()
214-
->fetchAssociative();
369+
->executeQuery();
370+
$row = $result->fetchAssociative();
371+
$result->closeCursor();
215372

216-
if ($result !== false) {
373+
if ($row !== false) {
217374
// there are other files in the directory, don't delete yet
218375
return;
219376
}
@@ -225,11 +382,11 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
225382
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($folderId)))
226383
->andWhere($qb->expr()->eq('storage', $qb->createNamedParameter($storageId)))
227384
->setMaxResults(1)
228-
->executeQuery()
229-
->fetchAssociative();
230-
231-
if ($result !== false) {
232-
$parentFolderId = (int)$result['parent'];
385+
->executeQuery();
386+
$row = $result->fetchAssociative();
387+
$result->closeCursor();
388+
if ($row !== false) {
389+
$parentFolderId = (int)$row['parent'];
233390

234391
$qb = $this->connection->getQueryBuilder();
235392
$qb->delete('filecache')

0 commit comments

Comments
 (0)