1616use OC \Preview \Db \Preview ;
1717use OC \Preview \Db \PreviewMapper ;
1818use OCP \DB \Exception ;
19+ use OCP \DB \QueryBuilder \IQueryBuilder ;
1920use OCP \Files \IMimeTypeDetector ;
2021use OCP \Files \IMimeTypeLoader ;
2122use OCP \Files \IRootFolder ;
3031use RecursiveIteratorIterator ;
3132
3233class LocalPreviewStorage implements IPreviewStorage {
34+ private const SCAN_BATCH_SIZE = 1000 ;
35+
3336 public function __construct (
3437 private readonly IConfig $ config ,
3538 private readonly PreviewMapper $ previewMapper ,
@@ -117,88 +120,241 @@ public function scan(): int {
117120 if (!file_exists ($ this ->getPreviewRootFolder ())) {
118121 return 0 ;
119122 }
123+
120124 $ scanner = new RecursiveDirectoryIterator ($ this ->getPreviewRootFolder ());
121125 $ previewsFound = 0 ;
122- $ skipFiles = [];
126+
127+ /**
128+ * Use an associative array keyed by path for O(1) lookup instead of
129+ * the O(n) in_array() the original code used.
130+ *
131+ * @var array<string, true> $skipPaths
132+ */
133+ $ skipPaths = [];
134+
135+ /**
136+ * Pending previews grouped by fileId. A single original file can have
137+ * many preview variants (different sizes/formats), so we group them to
138+ * issue one filecache lookup per original file rather than one per
139+ * preview variant.
140+ *
141+ * @var array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
142+ */
143+ $ pendingByFileId = [];
144+
145+ /**
146+ * path_hash => realPath for legacy filecache entries that need to be
147+ * cleaned up. Only populated when $checkForFileCache is true.
148+ *
149+ * @var array<string, string> $pendingPathHashes
150+ */
151+ $ pendingPathHashes = [];
152+ $ pendingCount = 0 ;
153+
123154 foreach (new RecursiveIteratorIterator ($ scanner ) as $ file ) {
124- if ($ file ->isFile () && !in_array ((string )$ file , $ skipFiles , true )) {
125- $ preview = Preview::fromPath ((string )$ file , $ this ->mimeTypeDetector );
126- if ($ preview === false ) {
127- $ this ->logger ->error ('Unable to parse preview information for ' . $ file ->getRealPath ());
128- continue ;
129- }
155+ if (!$ file ->isFile ()) {
156+ continue ;
157+ }
158+
159+ $ filePath = $ file ->getPathname ();
160+ if (isset ($ skipPaths [$ filePath ])) {
161+ continue ;
162+ }
163+
164+ $ preview = Preview::fromPath ($ filePath , $ this ->mimeTypeDetector );
165+ if ($ preview === false ) {
166+ $ this ->logger ->error ('Unable to parse preview information for ' . $ file ->getRealPath ());
167+ continue ;
168+ }
169+
170+ $ preview ->setSize ($ file ->getSize ());
171+ $ preview ->setMtime ($ file ->getMtime ());
172+ $ preview ->setEncrypted (false );
173+
174+ $ realPath = $ file ->getRealPath ();
175+ $ pendingByFileId [$ preview ->getFileId ()][] = [
176+ 'preview ' => $ preview ,
177+ 'filePath ' => $ filePath ,
178+ 'realPath ' => $ realPath ,
179+ ];
180+ $ pendingCount ++;
181+
182+ if ($ checkForFileCache ) {
183+ $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ realPath );
184+ $ pendingPathHashes [md5 ($ relativePath )] = $ realPath ;
185+ }
186+
187+ if ($ pendingCount >= self ::SCAN_BATCH_SIZE ) {
188+ $ this ->connection ->beginTransaction ();
130189 try {
131- $ preview ->setSize ($ file ->getSize ());
132- $ preview ->setMtime ($ file ->getMtime ());
133- $ preview ->setEncrypted (false );
134-
135- $ qb = $ this ->connection ->getQueryBuilder ();
136- $ result = $ qb ->select ('storage ' , 'etag ' , 'mimetype ' )
137- ->from ('filecache ' )
138- ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ preview ->getFileId ())))
139- ->setMaxResults (1 )
140- ->runAcrossAllShards () // Unavoidable because we can't extract the storage_id from the preview name
141- ->executeQuery ()
142- ->fetchAssociative ();
143-
144- if ($ result === false ) {
145- // original file is deleted
146- $ this ->logger ->warning ('Original file ' . $ preview ->getFileId () . ' was not found. Deleting preview at ' . $ file ->getRealPath ());
147- @unlink ($ file ->getRealPath ());
148- continue ;
149- }
190+ $ previewsFound += $ this ->processScanBatch ($ pendingByFileId , $ pendingPathHashes , $ checkForFileCache , $ skipPaths );
191+ $ this ->connection ->commit ();
192+ } catch (\Exception $ e ) {
193+ $ this ->connection ->rollBack ();
194+ $ this ->logger ->error ($ e ->getMessage (), ['exception ' => $ e ]);
195+ throw $ e ;
196+ }
197+ $ pendingByFileId = [];
198+ $ pendingPathHashes = [];
199+ $ pendingCount = 0 ;
200+ }
201+ }
202+
203+ if ($ pendingCount > 0 ) {
204+ $ this ->connection ->beginTransaction ();
205+ try {
206+ $ previewsFound += $ this ->processScanBatch ($ pendingByFileId , $ pendingPathHashes , $ checkForFileCache , $ skipPaths );
207+ $ this ->connection ->commit ();
208+ } catch (\Exception $ e ) {
209+ $ this ->connection ->rollBack ();
210+ $ this ->logger ->error ($ e ->getMessage (), ['exception ' => $ e ]);
211+ throw $ e ;
212+ }
213+ }
214+
215+ return $ previewsFound ;
216+ }
217+
218+ /**
219+ * Process one batch of preview files collected during scan().
220+ *
221+ * @param array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
222+ * @param array<string, string> $pendingPathHashes path_hash => realPath
223+ * @param array<string, true> $skipPaths Modified in place: newly-moved paths are added so the outer iterator skips them.
224+ */
225+ private function processScanBatch (
226+ array $ pendingByFileId ,
227+ array $ pendingPathHashes ,
228+ bool $ checkForFileCache ,
229+ array &$ skipPaths ,
230+ ): int {
231+ $ filecacheByFileId = $ this ->fetchFilecacheByFileIds (array_keys ($ pendingByFileId ));
232+ $ legacyByPathHash = [];
233+ if ($ checkForFileCache && $ pendingPathHashes !== []) {
234+ $ legacyByPathHash = $ this ->fetchFilecacheByPathHashes (array_keys ($ pendingPathHashes ));
235+ }
236+
237+ $ previewsFound = 0 ;
238+ foreach ($ pendingByFileId as $ fileId => $ items ) {
239+ if (!isset ($ filecacheByFileId [$ fileId ])) {
240+ // Original file has been deleted – clean up all its previews.
241+ foreach ($ items as $ item ) {
242+ $ this ->logger ->warning ('Original file ' . $ fileId . ' was not found. Deleting preview at ' . $ item ['realPath ' ]);
243+ @unlink ($ item ['realPath ' ]);
244+ }
245+ continue ;
246+ }
247+
248+ $ filecacheRow = $ filecacheByFileId [$ fileId ];
249+ foreach ($ items as $ item ) {
250+ $ preview = $ item ['preview ' ];
150251
151- if ($ checkForFileCache ) {
152- $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ file ->getRealPath ());
252+ if ($ checkForFileCache ) {
253+ $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ item ['realPath ' ]);
254+ $ pathHash = md5 ($ relativePath );
255+ if (isset ($ legacyByPathHash [$ pathHash ])) {
256+ $ legacyRow = $ legacyByPathHash [$ pathHash ];
153257 $ qb = $ this ->connection ->getQueryBuilder ();
154- $ result2 = $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' , 'parent ' )
155- ->from ('filecache ' )
156- ->where ($ qb ->expr ()->eq ('path_hash ' , $ qb ->createNamedParameter (md5 ($ relativePath ))))
157- ->runAcrossAllShards ()
158- ->setMaxResults (1 )
159- ->executeQuery ()
160- ->fetchAssociative ();
161-
162- if ($ result2 !== false ) {
163- $ qb ->delete ('filecache ' )
164- ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ result2 ['fileid ' ])))
165- ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ result2 ['storage ' ])))
166- ->executeStatement ();
167- $ this ->deleteParentsFromFileCache ((int )$ result2 ['parent ' ], (int )$ result2 ['storage ' ]);
168- }
258+ $ qb ->delete ('filecache ' )
259+ ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ legacyRow ['fileid ' ])))
260+ ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ legacyRow ['storage ' ])))
261+ ->executeStatement ();
262+ $ this ->deleteParentsFromFileCache ((int )$ legacyRow ['parent ' ], (int )$ legacyRow ['storage ' ]);
169263 }
264+ }
170265
171- $ preview ->setStorageId ((int )$ result ['storage ' ]);
172- $ preview ->setEtag ($ result ['etag ' ]);
173- $ preview ->setSourceMimetype ($ this ->mimeTypeLoader ->getMimetypeById ((int )$ result ['mimetype ' ]));
174- $ preview ->generateId ();
175- // try to insert, if that fails the preview is already in the DB
176- $ this ->previewMapper ->insert ($ preview );
266+ $ preview ->setStorageId ((int )$ filecacheRow ['storage ' ]);
267+ $ preview ->setEtag ($ filecacheRow ['etag ' ]);
268+ $ preview ->setSourceMimetype ($ this ->mimeTypeLoader ->getMimetypeById ((int )$ filecacheRow ['mimetype ' ]));
269+ $ preview ->generateId ();
177270
178- // Move old flat preview to new format
179- $ dirName = str_replace ($ this ->getPreviewRootFolder (), '' , $ file ->getPath ());
180- if (preg_match ('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/ ' , $ dirName ) !== 1 ) {
181- $ previewPath = $ this ->constructPath ($ preview );
182- $ this ->createParentFiles ($ previewPath );
183- $ ok = rename ($ file ->getRealPath (), $ previewPath );
184- if (!$ ok ) {
185- throw new LogicException ('Failed to move ' . $ file ->getRealPath () . ' to ' . $ previewPath );
186- }
187-
188- $ skipFiles [] = $ previewPath ;
189- }
271+ $ this ->connection ->beginTransaction ();
272+ try {
273+ $ this ->previewMapper ->insert ($ preview );
274+ $ this ->connection ->commit ();
190275 } catch (Exception $ e ) {
276+ $ this ->connection ->rollBack ();
191277 if ($ e ->getReason () !== Exception::REASON_UNIQUE_CONSTRAINT_VIOLATION ) {
192278 throw $ e ;
193279 }
194280 }
281+
282+ // Move old flat preview to new nested directory format.
283+ $ dirName = str_replace ($ this ->getPreviewRootFolder (), '' , $ item ['filePath ' ]);
284+ if (preg_match ('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/ ' , $ dirName ) !== 1 ) {
285+ $ previewPath = $ this ->constructPath ($ preview );
286+ $ this ->createParentFiles ($ previewPath );
287+ $ ok = rename ($ item ['realPath ' ], $ previewPath );
288+ if (!$ ok ) {
289+ throw new LogicException ('Failed to move ' . $ item ['realPath ' ] . ' to ' . $ previewPath );
290+ }
291+ // Mark the destination so the outer iterator skips it if it encounters the path later.
292+ $ skipPaths [$ previewPath ] = true ;
293+ }
294+
195295 $ previewsFound ++;
196296 }
197297 }
198298
199299 return $ previewsFound ;
200300 }
201301
302+ /**
303+ * Bulk-fetch filecache rows for a set of fileIds.
304+ *
305+ * @param int[] $fileIds
306+ */
307+ private function fetchFilecacheByFileIds (array $ fileIds ): array {
308+ if (empty ($ fileIds )) {
309+ return [];
310+ }
311+
312+ $ result = [];
313+ $ qb = $ this ->connection ->getQueryBuilder ();
314+ $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' )
315+ ->from ('filecache ' );
316+ foreach (array_chunk ($ fileIds , 1000 ) as $ chunk ) {
317+ $ qb ->andWhere (
318+ $ qb ->expr ()->in ('fileid ' , $ qb ->createNamedParameter ($ chunk , IQueryBuilder::PARAM_INT_ARRAY ))
319+ );
320+ }
321+ $ rows = $ qb ->runAcrossAllShards ()
322+ ->executeQuery ();
323+ while ($ row = $ rows ->fetchAssociative ()) {
324+ $ result [(int )$ row ['fileid ' ]] = $ row ;
325+ }
326+ $ rows ->closeCursor ();
327+ return $ result ;
328+ }
329+
330+ /**
331+ * Bulk-fetch filecache rows for a set of path_hashes (legacy migration).
332+ *
333+ * @param string[] $pathHashes
334+ */
335+ private function fetchFilecacheByPathHashes (array $ pathHashes ): array {
336+ if (empty ($ pathHashes )) {
337+ return [];
338+ }
339+
340+ $ result = [];
341+ $ qb = $ this ->connection ->getQueryBuilder ();
342+ $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' , 'parent ' , 'path_hash ' )
343+ ->from ('filecache ' );
344+ foreach (array_chunk ($ pathHashes , 1000 ) as $ chunk ) {
345+ $ qb ->andWhere (
346+ $ qb ->expr ()->in ('path_hash ' , $ qb ->createNamedParameter ($ chunk , IQueryBuilder::PARAM_STR_ARRAY ))
347+ );
348+ }
349+ $ rows = $ qb ->runAcrossAllShards ()
350+ ->executeQuery ();
351+ while ($ row = $ rows ->fetchAssociative ()) {
352+ $ result [$ row ['path_hash ' ]] = $ row ;
353+ }
354+ $ rows ->closeCursor ();
355+ return $ result ;
356+ }
357+
202358 /**
203359 * Recursive method that deletes the folder and its parent folders if it's not
204360 * empty.
@@ -210,10 +366,11 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
210366 ->where ($ qb ->expr ()->eq ('parent ' , $ qb ->createNamedParameter ($ folderId )))
211367 ->setMaxResults (1 )
212368 ->runAcrossAllShards ()
213- ->executeQuery ()
214- ->fetchAssociative ();
369+ ->executeQuery ();
370+ $ row = $ result ->fetchAssociative ();
371+ $ result ->closeCursor ();
215372
216- if ($ result !== false ) {
373+ if ($ row !== false ) {
217374 // there are other files in the directory, don't delete yet
218375 return ;
219376 }
@@ -225,11 +382,11 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
225382 ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ folderId )))
226383 ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ storageId )))
227384 ->setMaxResults (1 )
228- ->executeQuery ()
229- ->fetchAssociative ();
230-
231- if ($ result !== false ) {
232- $ parentFolderId = (int )$ result ['parent ' ];
385+ ->executeQuery ();
386+ $ row = $ result ->fetchAssociative ();
387+ $ result -> closeCursor ();
388+ if ($ row !== false ) {
389+ $ parentFolderId = (int )$ row ['parent ' ];
233390
234391 $ qb = $ this ->connection ->getQueryBuilder ();
235392 $ qb ->delete ('filecache ' )
0 commit comments