fix(server): external library sync not working for large libraries (#7759)

This commit is contained in:
Mert
2024-03-10 22:30:57 -04:00
committed by GitHub
parent 49d9051879
commit 5bd597f14b
9 changed files with 106 additions and 104 deletions

View File

@@ -156,8 +156,7 @@ describe(LibraryService.name, () => {
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
assetMock.getPathsNotInLibrary.mockResolvedValue(['/data/user1/photo.jpg']);
assetMock.getByLibraryId.mockResolvedValue([]);
assetMock.getLibraryAssetPaths.mockResolvedValue({ items: [], hasNextPage: false });
await sut.handleQueueAssetRefresh(mockLibraryJob);
@@ -183,7 +182,7 @@ describe(LibraryService.name, () => {
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
assetMock.getByLibraryId.mockResolvedValue([]);
assetMock.getLibraryAssetPaths.mockResolvedValue({ items: [], hasNextPage: false });
await sut.handleQueueAssetRefresh(mockLibraryJob);
@@ -233,7 +232,7 @@ describe(LibraryService.name, () => {
libraryMock.get.mockResolvedValue(libraryStub.externalLibraryWithImportPaths1);
storageMock.crawl.mockResolvedValue([]);
assetMock.getByLibraryId.mockResolvedValue([]);
assetMock.getLibraryAssetPaths.mockResolvedValue({ items: [], hasNextPage: false });
await sut.handleQueueAssetRefresh(mockLibraryJob);
@@ -242,6 +241,48 @@ describe(LibraryService.name, () => {
exclusionPatterns: [],
});
});
it('should set missing assets offline', async () => {
const mockLibraryJob: ILibraryRefreshJob = {
id: libraryStub.externalLibrary1.id,
refreshModifiedFiles: false,
refreshAllFiles: false,
};
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue([]);
assetMock.getLibraryAssetPaths.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
await sut.handleQueueAssetRefresh(mockLibraryJob);
expect(assetMock.updateAll).toHaveBeenCalledWith([assetStub.image.id], { isOffline: true });
expect(assetMock.updateAll).not.toHaveBeenCalledWith(expect.anything(), { isOffline: false });
expect(jobMock.queueAll).not.toHaveBeenCalled();
});
it('should set crawled assets that were previously offline back online', async () => {
const mockLibraryJob: ILibraryRefreshJob = {
id: libraryStub.externalLibrary1.id,
refreshModifiedFiles: false,
refreshAllFiles: false,
};
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue([assetStub.offline.originalPath]);
assetMock.getLibraryAssetPaths.mockResolvedValue({
items: [assetStub.offline],
hasNextPage: false,
});
await sut.handleQueueAssetRefresh(mockLibraryJob);
expect(assetMock.updateAll).toHaveBeenCalledWith([assetStub.offline.id], { isOffline: false });
expect(assetMock.updateAll).not.toHaveBeenCalledWith(expect.anything(), { isOffline: true });
expect(jobMock.queueAll).not.toHaveBeenCalled();
});
});
describe('handleAssetRefresh', () => {

View File

@@ -640,27 +640,56 @@ export class LibraryService extends EventEmitter {
.filter((validation) => validation.isValid)
.map((validation) => validation.importPath);
const rawPaths = await this.storageRepository.crawl({
let rawPaths = await this.storageRepository.crawl({
pathsToCrawl: validImportPaths,
exclusionPatterns: library.exclusionPatterns,
});
const crawledAssetPaths = rawPaths.map((filePath) => path.normalize(filePath));
const crawledAssetPaths = new Set<string>(rawPaths);
this.logger.debug(`Found ${crawledAssetPaths.length} asset(s) when crawling import paths ${library.importPaths}`);
const shouldScanAll = job.refreshAllFiles || job.refreshModifiedFiles;
let pathsToScan: string[] = shouldScanAll ? rawPaths : [];
rawPaths = [];
await this.assetRepository.updateOfflineLibraryAssets(library.id, crawledAssetPaths);
this.logger.debug(`Found ${crawledAssetPaths.size} asset(s) when crawling import paths ${library.importPaths}`);
if (crawledAssetPaths.length > 0) {
let filteredPaths: string[] = [];
if (job.refreshAllFiles || job.refreshModifiedFiles) {
filteredPaths = crawledAssetPaths;
} else {
filteredPaths = await this.assetRepository.getPathsNotInLibrary(library.id, crawledAssetPaths);
const assetIdsToMarkOffline = [];
const assetIdsToMarkOnline = [];
const pagination = usePagination(5000, (pagination) =>
this.assetRepository.getLibraryAssetPaths(pagination, library.id),
);
this.logger.debug(`Will import ${filteredPaths.length} new asset(s)`);
for await (const page of pagination) {
for (const asset of page) {
const isOffline = !crawledAssetPaths.has(asset.originalPath);
if (isOffline && !asset.isOffline) {
assetIdsToMarkOffline.push(asset.id);
}
if (!isOffline && asset.isOffline) {
assetIdsToMarkOnline.push(asset.id);
}
crawledAssetPaths.delete(asset.originalPath);
}
}
await this.scanAssets(job.id, filteredPaths, library.ownerId, job.refreshAllFiles ?? false);
if (assetIdsToMarkOffline.length > 0) {
this.logger.debug(`Found ${assetIdsToMarkOffline.length} offline asset(s) previously marked as online`);
await this.assetRepository.updateAll(assetIdsToMarkOffline, { isOffline: true });
}
if (assetIdsToMarkOnline.length > 0) {
this.logger.debug(`Found ${assetIdsToMarkOnline.length} online asset(s) previously marked as offline`);
await this.assetRepository.updateAll(assetIdsToMarkOnline, { isOffline: false });
}
if (!shouldScanAll) {
pathsToScan = [...crawledAssetPaths];
this.logger.debug(`Will import ${pathsToScan.length} new asset(s)`);
}
if (pathsToScan.length > 0) {
await this.scanAssets(job.id, pathsToScan, library.ownerId, job.refreshAllFiles ?? false);
}
await this.repository.update({ id: job.id, refreshedAt: new Date() });

View File

@@ -109,6 +109,8 @@ export interface MetadataSearchOptions {
numResults: number;
}
export type AssetPathEntity = Pick<AssetEntity, 'id' | 'originalPath' | 'isOffline'>;
export const IAssetRepository = 'IAssetRepository';
export interface IAssetRepository {
@@ -129,10 +131,8 @@ export interface IAssetRepository {
getRandom(userId: string, count: number): Promise<AssetEntity[]>;
getFirstAssetForAlbumId(albumId: string): Promise<AssetEntity | null>;
getLastUpdatedAssetForAlbumId(albumId: string): Promise<AssetEntity | null>;
getByLibraryId(libraryIds: string[]): Promise<AssetEntity[]>;
getLibraryAssetPaths(pagination: PaginationOptions, libraryId: string): Paginated<AssetPathEntity>;
getByLibraryIdAndOriginalPath(libraryId: string, originalPath: string): Promise<AssetEntity | null>;
getPathsNotInLibrary(libraryId: string, originalPaths: string[]): Promise<string[]>;
updateOfflineLibraryAssets(libraryId: string, originalPaths: string[]): Promise<void>;
deleteAll(ownerId: string): Promise<void>;
getAll(pagination: PaginationOptions, options?: AssetSearchOptions): Paginated<AssetEntity>;
getAllByDeviceId(userId: string, deviceId: string): Promise<string[]>;

View File

@@ -2,6 +2,7 @@ import {
AssetBuilderOptions,
AssetCreate,
AssetExploreFieldOptions,
AssetPathEntity,
AssetSearchOptions,
AssetStats,
AssetStatsOptions,
@@ -184,10 +185,10 @@ export class AssetRepository implements IAssetRepository {
}
@GenerateSql({ params: [[DummyValue.UUID]] })
@ChunkedArray()
getByLibraryId(libraryIds: string[]): Promise<AssetEntity[]> {
return this.repository.find({
where: { library: { id: In(libraryIds) } },
getLibraryAssetPaths(pagination: PaginationOptions, libraryId: string): Paginated<AssetPathEntity> {
return paginate(this.repository, pagination, {
select: { id: true, originalPath: true, isOffline: true },
where: { library: { id: libraryId } },
});
}

View File

@@ -11,7 +11,7 @@ import {
import { ImmichLogger } from '@app/infra/logger';
import archiver from 'archiver';
import chokidar, { WatchOptions } from 'chokidar';
import { glob } from 'glob';
import { glob } from 'fast-glob';
import { constants, createReadStream, existsSync, mkdirSync } from 'node:fs';
import fs, { copyFile, readdir, rename, stat, utimes, writeFile } from 'node:fs/promises';
import path from 'node:path';
@@ -123,7 +123,7 @@ export class FilesystemProvider implements IStorageRepository {
crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
const { pathsToCrawl, exclusionPatterns, includeHidden } = crawlOptions;
if (!pathsToCrawl) {
if (pathsToCrawl.length === 0) {
return Promise.resolve([]);
}
@@ -132,8 +132,8 @@ export class FilesystemProvider implements IStorageRepository {
return glob(`${base}/**/${extensions}`, {
absolute: true,
nocase: true,
nodir: true,
caseSensitiveMatch: false,
onlyFiles: true,
dot: includeHidden,
ignore: exclusionPatterns,
});

View File

@@ -293,53 +293,6 @@ DELETE FROM "assets"
WHERE
"ownerId" = $1
-- AssetRepository.getByLibraryId
SELECT
"AssetEntity"."id" AS "AssetEntity_id",
"AssetEntity"."deviceAssetId" AS "AssetEntity_deviceAssetId",
"AssetEntity"."ownerId" AS "AssetEntity_ownerId",
"AssetEntity"."libraryId" AS "AssetEntity_libraryId",
"AssetEntity"."deviceId" AS "AssetEntity_deviceId",
"AssetEntity"."type" AS "AssetEntity_type",
"AssetEntity"."originalPath" AS "AssetEntity_originalPath",
"AssetEntity"."resizePath" AS "AssetEntity_resizePath",
"AssetEntity"."webpPath" AS "AssetEntity_webpPath",
"AssetEntity"."thumbhash" AS "AssetEntity_thumbhash",
"AssetEntity"."encodedVideoPath" AS "AssetEntity_encodedVideoPath",
"AssetEntity"."createdAt" AS "AssetEntity_createdAt",
"AssetEntity"."updatedAt" AS "AssetEntity_updatedAt",
"AssetEntity"."deletedAt" AS "AssetEntity_deletedAt",
"AssetEntity"."fileCreatedAt" AS "AssetEntity_fileCreatedAt",
"AssetEntity"."localDateTime" AS "AssetEntity_localDateTime",
"AssetEntity"."fileModifiedAt" AS "AssetEntity_fileModifiedAt",
"AssetEntity"."isFavorite" AS "AssetEntity_isFavorite",
"AssetEntity"."isArchived" AS "AssetEntity_isArchived",
"AssetEntity"."isExternal" AS "AssetEntity_isExternal",
"AssetEntity"."isReadOnly" AS "AssetEntity_isReadOnly",
"AssetEntity"."isOffline" AS "AssetEntity_isOffline",
"AssetEntity"."checksum" AS "AssetEntity_checksum",
"AssetEntity"."duration" AS "AssetEntity_duration",
"AssetEntity"."isVisible" AS "AssetEntity_isVisible",
"AssetEntity"."livePhotoVideoId" AS "AssetEntity_livePhotoVideoId",
"AssetEntity"."originalFileName" AS "AssetEntity_originalFileName",
"AssetEntity"."sidecarPath" AS "AssetEntity_sidecarPath",
"AssetEntity"."stackId" AS "AssetEntity_stackId"
FROM
"assets" "AssetEntity"
LEFT JOIN "libraries" "AssetEntity__AssetEntity_library" ON "AssetEntity__AssetEntity_library"."id" = "AssetEntity"."libraryId"
AND (
"AssetEntity__AssetEntity_library"."deletedAt" IS NULL
)
WHERE
(
(
(
(("AssetEntity__AssetEntity_library"."id" IN ($1)))
)
)
)
AND ("AssetEntity"."deletedAt" IS NULL)
-- AssetRepository.getByLibraryIdAndOriginalPath
SELECT DISTINCT
"distinctAlias"."AssetEntity_id" AS "ids_AssetEntity_id"