1616use OC \Preview \Db \Preview ;
1717use OC \Preview \Db \PreviewMapper ;
1818use OCP \DB \Exception ;
19- use OCP \DB \QueryBuilder \IQueryBuilder ;
2019use OCP \Files \IMimeTypeDetector ;
2120use OCP \Files \IMimeTypeLoader ;
2221use OCP \Files \IRootFolder ;
3130use RecursiveIteratorIterator ;
3231
3332class LocalPreviewStorage implements IPreviewStorage {
34- private const SCAN_BATCH_SIZE = 1000 ;
35-
3633 public function __construct (
3734 private readonly IConfig $ config ,
3835 private readonly PreviewMapper $ previewMapper ,
@@ -120,241 +117,88 @@ public function scan(): int {
120117 if (!file_exists ($ this ->getPreviewRootFolder ())) {
121118 return 0 ;
122119 }
123-
124120 $ scanner = new RecursiveDirectoryIterator ($ this ->getPreviewRootFolder ());
125121 $ previewsFound = 0 ;
126-
127- /**
128- * Use an associative array keyed by path for O(1) lookup instead of
129- * the O(n) in_array() the original code used.
130- *
131- * @var array<string, true> $skipPaths
132- */
133- $ skipPaths = [];
134-
135- /**
136- * Pending previews grouped by fileId. A single original file can have
137- * many preview variants (different sizes/formats), so we group them to
138- * issue one filecache lookup per original file rather than one per
139- * preview variant.
140- *
141- * @var array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
142- */
143- $ pendingByFileId = [];
144-
145- /**
146- * path_hash => realPath for legacy filecache entries that need to be
147- * cleaned up. Only populated when $checkForFileCache is true.
148- *
149- * @var array<string, string> $pendingPathHashes
150- */
151- $ pendingPathHashes = [];
152- $ pendingCount = 0 ;
153-
122+ $ skipFiles = [];
154123 foreach (new RecursiveIteratorIterator ($ scanner ) as $ file ) {
155- if (!$ file ->isFile ()) {
156- continue ;
157- }
158-
159- $ filePath = $ file ->getPathname ();
160- if (isset ($ skipPaths [$ filePath ])) {
161- continue ;
162- }
163-
164- $ preview = Preview::fromPath ($ filePath , $ this ->mimeTypeDetector );
165- if ($ preview === false ) {
166- $ this ->logger ->error ('Unable to parse preview information for ' . $ file ->getRealPath ());
167- continue ;
168- }
169-
170- $ preview ->setSize ($ file ->getSize ());
171- $ preview ->setMtime ($ file ->getMtime ());
172- $ preview ->setEncrypted (false );
173-
174- $ realPath = $ file ->getRealPath ();
175- $ pendingByFileId [$ preview ->getFileId ()][] = [
176- 'preview ' => $ preview ,
177- 'filePath ' => $ filePath ,
178- 'realPath ' => $ realPath ,
179- ];
180- $ pendingCount ++;
181-
182- if ($ checkForFileCache ) {
183- $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ realPath );
184- $ pendingPathHashes [md5 ($ relativePath )] = $ realPath ;
185- }
186-
187- if ($ pendingCount >= self ::SCAN_BATCH_SIZE ) {
188- $ this ->connection ->beginTransaction ();
189- try {
190- $ previewsFound += $ this ->processScanBatch ($ pendingByFileId , $ pendingPathHashes , $ checkForFileCache , $ skipPaths );
191- $ this ->connection ->commit ();
192- } catch (\Exception $ e ) {
193- $ this ->connection ->rollBack ();
194- $ this ->logger ->error ($ e ->getMessage (), ['exception ' => $ e ]);
195- throw $ e ;
196- }
197- $ pendingByFileId = [];
198- $ pendingPathHashes = [];
199- $ pendingCount = 0 ;
200- }
201- }
202-
203- if ($ pendingCount > 0 ) {
204- $ this ->connection ->beginTransaction ();
205- try {
206- $ previewsFound += $ this ->processScanBatch ($ pendingByFileId , $ pendingPathHashes , $ checkForFileCache , $ skipPaths );
207- $ this ->connection ->commit ();
208- } catch (\Exception $ e ) {
209- $ this ->connection ->rollBack ();
210- $ this ->logger ->error ($ e ->getMessage (), ['exception ' => $ e ]);
211- throw $ e ;
212- }
213- }
214-
215- return $ previewsFound ;
216- }
217-
218- /**
219- * Process one batch of preview files collected during scan().
220- *
221- * @param array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
222- * @param array<string, string> $pendingPathHashes path_hash => realPath
223- * @param array<string, true> $skipPaths Modified in place: newly-moved paths are added so the outer iterator skips them.
224- */
225- private function processScanBatch (
226- array $ pendingByFileId ,
227- array $ pendingPathHashes ,
228- bool $ checkForFileCache ,
229- array &$ skipPaths ,
230- ): int {
231- $ filecacheByFileId = $ this ->fetchFilecacheByFileIds (array_keys ($ pendingByFileId ));
232- $ legacyByPathHash = [];
233- if ($ checkForFileCache && $ pendingPathHashes !== []) {
234- $ legacyByPathHash = $ this ->fetchFilecacheByPathHashes (array_keys ($ pendingPathHashes ));
235- }
236-
237- $ previewsFound = 0 ;
238- foreach ($ pendingByFileId as $ fileId => $ items ) {
239- if (!isset ($ filecacheByFileId [$ fileId ])) {
240- // Original file has been deleted – clean up all its previews.
241- foreach ($ items as $ item ) {
242- $ this ->logger ->warning ('Original file ' . $ fileId . ' was not found. Deleting preview at ' . $ item ['realPath ' ]);
243- @unlink ($ item ['realPath ' ]);
124+ if ($ file ->isFile () && !in_array ((string )$ file , $ skipFiles , true )) {
125+ $ preview = Preview::fromPath ((string )$ file , $ this ->mimeTypeDetector );
126+ if ($ preview === false ) {
127+ $ this ->logger ->error ('Unable to parse preview information for ' . $ file ->getRealPath ());
128+ continue ;
244129 }
245- continue ;
246- }
247-
248- $ filecacheRow = $ filecacheByFileId [$ fileId ];
249- foreach ($ items as $ item ) {
250- $ preview = $ item ['preview ' ];
130+ try {
131+ $ preview ->setSize ($ file ->getSize ());
132+ $ preview ->setMtime ($ file ->getMtime ());
133+ $ preview ->setEncrypted (false );
134+
135+ $ qb = $ this ->connection ->getQueryBuilder ();
136+ $ result = $ qb ->select ('storage ' , 'etag ' , 'mimetype ' )
137+ ->from ('filecache ' )
138+ ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ preview ->getFileId ())))
139+ ->setMaxResults (1 )
140+ ->runAcrossAllShards () // Unavoidable because we can't extract the storage_id from the preview name
141+ ->executeQuery ()
142+ ->fetchAssociative ();
143+
144+ if ($ result === false ) {
145+ // original file is deleted
146+ $ this ->logger ->warning ('Original file ' . $ preview ->getFileId () . ' was not found. Deleting preview at ' . $ file ->getRealPath ());
147+ @unlink ($ file ->getRealPath ());
148+ continue ;
149+ }
251150
252- if ($ checkForFileCache ) {
253- $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ item ['realPath ' ]);
254- $ pathHash = md5 ($ relativePath );
255- if (isset ($ legacyByPathHash [$ pathHash ])) {
256- $ legacyRow = $ legacyByPathHash [$ pathHash ];
151+ if ($ checkForFileCache ) {
152+ $ relativePath = str_replace ($ this ->getRootFolder () . '/ ' , '' , $ file ->getRealPath ());
257153 $ qb = $ this ->connection ->getQueryBuilder ();
258- $ qb ->delete ('filecache ' )
259- ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ legacyRow ['fileid ' ])))
260- ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ legacyRow ['storage ' ])))
261- ->executeStatement ();
262- $ this ->deleteParentsFromFileCache ((int )$ legacyRow ['parent ' ], (int )$ legacyRow ['storage ' ]);
154+ $ result2 = $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' , 'parent ' )
155+ ->from ('filecache ' )
156+ ->where ($ qb ->expr ()->eq ('path_hash ' , $ qb ->createNamedParameter (md5 ($ relativePath ))))
157+ ->runAcrossAllShards ()
158+ ->setMaxResults (1 )
159+ ->executeQuery ()
160+ ->fetchAssociative ();
161+
162+ if ($ result2 !== false ) {
163+ $ qb ->delete ('filecache ' )
164+ ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ result2 ['fileid ' ])))
165+ ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ result2 ['storage ' ])))
166+ ->executeStatement ();
167+ $ this ->deleteParentsFromFileCache ((int )$ result2 ['parent ' ], (int )$ result2 ['storage ' ]);
168+ }
263169 }
264- }
265-
266- $ preview ->setStorageId ((int )$ filecacheRow ['storage ' ]);
267- $ preview ->setEtag ($ filecacheRow ['etag ' ]);
268- $ preview ->setSourceMimetype ($ this ->mimeTypeLoader ->getMimetypeById ((int )$ filecacheRow ['mimetype ' ]));
269- $ preview ->generateId ();
270170
271- $ this ->connection ->beginTransaction ();
272- try {
171+ $ preview ->setStorageId ((int )$ result ['storage ' ]);
172+ $ preview ->setEtag ($ result ['etag ' ]);
173+ $ preview ->setSourceMimetype ($ this ->mimeTypeLoader ->getMimetypeById ((int )$ result ['mimetype ' ]));
174+ $ preview ->generateId ();
175+ // try to insert, if that fails the preview is already in the DB
273176 $ this ->previewMapper ->insert ($ preview );
274- $ this ->connection ->commit ();
177+
178+ // Move old flat preview to new format
179+ $ dirName = str_replace ($ this ->getPreviewRootFolder (), '' , $ file ->getPath ());
180+ if (preg_match ('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/ ' , $ dirName ) !== 1 ) {
181+ $ previewPath = $ this ->constructPath ($ preview );
182+ $ this ->createParentFiles ($ previewPath );
183+ $ ok = rename ($ file ->getRealPath (), $ previewPath );
184+ if (!$ ok ) {
185+ throw new LogicException ('Failed to move ' . $ file ->getRealPath () . ' to ' . $ previewPath );
186+ }
187+
188+ $ skipFiles [] = $ previewPath ;
189+ }
275190 } catch (Exception $ e ) {
276- $ this ->connection ->rollBack ();
277191 if ($ e ->getReason () !== Exception::REASON_UNIQUE_CONSTRAINT_VIOLATION ) {
278192 throw $ e ;
279193 }
280194 }
281-
282- // Move old flat preview to new nested directory format.
283- $ dirName = str_replace ($ this ->getPreviewRootFolder (), '' , $ item ['filePath ' ]);
284- if (preg_match ('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/ ' , $ dirName ) !== 1 ) {
285- $ previewPath = $ this ->constructPath ($ preview );
286- $ this ->createParentFiles ($ previewPath );
287- $ ok = rename ($ item ['realPath ' ], $ previewPath );
288- if (!$ ok ) {
289- throw new LogicException ('Failed to move ' . $ item ['realPath ' ] . ' to ' . $ previewPath );
290- }
291- // Mark the destination so the outer iterator skips it if it encounters the path later.
292- $ skipPaths [$ previewPath ] = true ;
293- }
294-
295195 $ previewsFound ++;
296196 }
297197 }
298198
299199 return $ previewsFound ;
300200 }
301201
302- /**
303- * Bulk-fetch filecache rows for a set of fileIds.
304- *
305- * @param int[] $fileIds
306- */
307- private function fetchFilecacheByFileIds (array $ fileIds ): array {
308- if (empty ($ fileIds )) {
309- return [];
310- }
311-
312- $ result = [];
313- $ qb = $ this ->connection ->getQueryBuilder ();
314- $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' )
315- ->from ('filecache ' );
316- foreach (array_chunk ($ fileIds , 1000 ) as $ chunk ) {
317- $ qb ->andWhere (
318- $ qb ->expr ()->in ('fileid ' , $ qb ->createNamedParameter ($ chunk , IQueryBuilder::PARAM_INT_ARRAY ))
319- );
320- }
321- $ rows = $ qb ->runAcrossAllShards ()
322- ->executeQuery ();
323- while ($ row = $ rows ->fetchAssociative ()) {
324- $ result [(int )$ row ['fileid ' ]] = $ row ;
325- }
326- $ rows ->closeCursor ();
327- return $ result ;
328- }
329-
330- /**
331- * Bulk-fetch filecache rows for a set of path_hashes (legacy migration).
332- *
333- * @param string[] $pathHashes
334- */
335- private function fetchFilecacheByPathHashes (array $ pathHashes ): array {
336- if (empty ($ pathHashes )) {
337- return [];
338- }
339-
340- $ result = [];
341- $ qb = $ this ->connection ->getQueryBuilder ();
342- $ qb ->select ('fileid ' , 'storage ' , 'etag ' , 'mimetype ' , 'parent ' , 'path_hash ' )
343- ->from ('filecache ' );
344- foreach (array_chunk ($ pathHashes , 1000 ) as $ chunk ) {
345- $ qb ->andWhere (
346- $ qb ->expr ()->in ('path_hash ' , $ qb ->createNamedParameter ($ chunk , IQueryBuilder::PARAM_STR_ARRAY ))
347- );
348- }
349- $ rows = $ qb ->runAcrossAllShards ()
350- ->executeQuery ();
351- while ($ row = $ rows ->fetchAssociative ()) {
352- $ result [$ row ['path_hash ' ]] = $ row ;
353- }
354- $ rows ->closeCursor ();
355- return $ result ;
356- }
357-
358202 /**
359203 * Recursive method that deletes the folder and its parent folders if it's not
360204 * empty.
@@ -366,11 +210,10 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
366210 ->where ($ qb ->expr ()->eq ('parent ' , $ qb ->createNamedParameter ($ folderId )))
367211 ->setMaxResults (1 )
368212 ->runAcrossAllShards ()
369- ->executeQuery ();
370- $ row = $ result ->fetchAssociative ();
371- $ result ->closeCursor ();
213+ ->executeQuery ()
214+ ->fetchAssociative ();
372215
373- if ($ row !== false ) {
216+ if ($ result !== false ) {
374217 // there are other files in the directory, don't delete yet
375218 return ;
376219 }
@@ -382,11 +225,11 @@ private function deleteParentsFromFileCache(int $folderId, int $storageId): void
382225 ->where ($ qb ->expr ()->eq ('fileid ' , $ qb ->createNamedParameter ($ folderId )))
383226 ->andWhere ($ qb ->expr ()->eq ('storage ' , $ qb ->createNamedParameter ($ storageId )))
384227 ->setMaxResults (1 )
385- ->executeQuery ();
386- $ row = $ result ->fetchAssociative ();
387- $ result -> closeCursor ();
388- if ($ row !== false ) {
389- $ parentFolderId = (int )$ row ['parent ' ];
228+ ->executeQuery ()
229+ ->fetchAssociative ();
230+
231+ if ($ result !== false ) {
232+ $ parentFolderId = (int )$ result ['parent ' ];
390233
391234 $ qb = $ this ->connection ->getQueryBuilder ();
392235 $ qb ->delete ('filecache ' )
0 commit comments