refactor: duplicate queries (#19136)

This commit is contained in:
Jason Rasmussen
2025-06-12 14:23:02 -04:00
committed by GitHub
parent 144cc8ab6d
commit 5cd186d3d4
14 changed files with 280 additions and 270 deletions

View File

@@ -18,6 +18,7 @@ import { CronRepository } from 'src/repositories/cron.repository';
import { CryptoRepository } from 'src/repositories/crypto.repository';
import { DatabaseRepository } from 'src/repositories/database.repository';
import { DownloadRepository } from 'src/repositories/download.repository';
import { DuplicateRepository } from 'src/repositories/duplicate.repository';
import { EmailRepository } from 'src/repositories/email.repository';
import { EventRepository } from 'src/repositories/event.repository';
import { JobRepository } from 'src/repositories/job.repository';
@@ -71,6 +72,7 @@ export class BaseService {
protected cryptoRepository: CryptoRepository,
protected databaseRepository: DatabaseRepository,
protected downloadRepository: DownloadRepository,
protected duplicateRepository: DuplicateRepository,
protected emailRepository: EmailRepository,
protected eventRepository: EventRepository,
protected jobRepository: JobRepository,

View File

@@ -38,7 +38,7 @@ describe(SearchService.name, () => {
describe('getDuplicates', () => {
it('should get duplicates', async () => {
mocks.asset.getDuplicates.mockResolvedValue([
mocks.duplicateRepository.getAll.mockResolvedValue([
{
duplicateId: 'duplicate-id',
assets: [assetStub.image, assetStub.image],
@@ -218,25 +218,26 @@ describe(SearchService.name, () => {
it('should search for duplicates and update asset with duplicateId', async () => {
mocks.assetJob.getForSearchDuplicatesJob.mockResolvedValue(hasEmbedding);
mocks.search.searchDuplicates.mockResolvedValue([
mocks.duplicateRepository.search.mockResolvedValue([
{ assetId: assetStub.image.id, distance: 0.01, duplicateId: null },
]);
mocks.duplicateRepository.merge.mockResolvedValue();
const expectedAssetIds = [assetStub.image.id, hasEmbedding.id];
const result = await sut.handleSearchDuplicates({ id: hasEmbedding.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(mocks.search.searchDuplicates).toHaveBeenCalledWith({
expect(mocks.duplicateRepository.search).toHaveBeenCalledWith({
assetId: hasEmbedding.id,
embedding: hasEmbedding.embedding,
maxDistance: 0.01,
type: hasEmbedding.type,
userIds: [hasEmbedding.ownerId],
});
expect(mocks.asset.updateDuplicates).toHaveBeenCalledWith({
expect(mocks.duplicateRepository.merge).toHaveBeenCalledWith({
assetIds: expectedAssetIds,
targetDuplicateId: expect.any(String),
duplicateIds: [],
targetId: expect.any(String),
sourceIds: [],
});
expect(mocks.asset.upsertJobStatus).toHaveBeenCalledWith(
...expectedAssetIds.map((assetId) => ({ assetId, duplicatesDetectedAt: expect.any(Date) })),
@@ -246,23 +247,24 @@ describe(SearchService.name, () => {
it('should use existing duplicate ID among matched duplicates', async () => {
const duplicateId = hasDupe.duplicateId;
mocks.assetJob.getForSearchDuplicatesJob.mockResolvedValue(hasEmbedding);
mocks.search.searchDuplicates.mockResolvedValue([{ assetId: hasDupe.id, distance: 0.01, duplicateId }]);
mocks.duplicateRepository.search.mockResolvedValue([{ assetId: hasDupe.id, distance: 0.01, duplicateId }]);
mocks.duplicateRepository.merge.mockResolvedValue();
const expectedAssetIds = [hasEmbedding.id];
const result = await sut.handleSearchDuplicates({ id: hasEmbedding.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(mocks.search.searchDuplicates).toHaveBeenCalledWith({
expect(mocks.duplicateRepository.search).toHaveBeenCalledWith({
assetId: hasEmbedding.id,
embedding: hasEmbedding.embedding,
maxDistance: 0.01,
type: hasEmbedding.type,
userIds: [hasEmbedding.ownerId],
});
expect(mocks.asset.updateDuplicates).toHaveBeenCalledWith({
expect(mocks.duplicateRepository.merge).toHaveBeenCalledWith({
assetIds: expectedAssetIds,
targetDuplicateId: duplicateId,
duplicateIds: [],
targetId: duplicateId,
sourceIds: [],
});
expect(mocks.asset.upsertJobStatus).toHaveBeenCalledWith(
...expectedAssetIds.map((assetId) => ({ assetId, duplicatesDetectedAt: expect.any(Date) })),
@@ -271,7 +273,7 @@ describe(SearchService.name, () => {
it('should remove duplicateId if no duplicates found and asset has duplicateId', async () => {
mocks.assetJob.getForSearchDuplicatesJob.mockResolvedValue(hasDupe);
mocks.search.searchDuplicates.mockResolvedValue([]);
mocks.duplicateRepository.search.mockResolvedValue([]);
const result = await sut.handleSearchDuplicates({ id: hasDupe.id });

View File

@@ -13,7 +13,7 @@ import { isDuplicateDetectionEnabled } from 'src/utils/misc';
@Injectable()
export class DuplicateService extends BaseService {
async getDuplicates(auth: AuthDto): Promise<DuplicateResponseDto[]> {
const duplicates = await this.assetRepository.getDuplicates(auth.user.id);
const duplicates = await this.duplicateRepository.getAll(auth.user.id);
return duplicates.map(({ duplicateId, assets }) => ({
duplicateId,
assets: assets.map((asset) => mapAsset(asset, { auth })),
@@ -74,7 +74,7 @@ export class DuplicateService extends BaseService {
return JobStatus.FAILED;
}
const duplicateAssets = await this.searchRepository.searchDuplicates({
const duplicateAssets = await this.duplicateRepository.search({
assetId: asset.id,
embedding: asset.embedding,
maxDistance: machineLearning.duplicateDetection.maxDistance,
@@ -117,7 +117,11 @@ export class DuplicateService extends BaseService {
.map((duplicate) => duplicate.assetId);
assetIdsToUpdate.push(asset.id);
await this.assetRepository.updateDuplicates({ targetDuplicateId, assetIds: assetIdsToUpdate, duplicateIds });
await this.duplicateRepository.merge({
targetId: targetDuplicateId,
assetIds: assetIdsToUpdate,
sourceIds: duplicateIds,
});
return assetIdsToUpdate;
}
}

View File

@@ -24,7 +24,6 @@ import {
VideoCodec,
VideoContainer,
} from 'src/enum';
import { UpsertFileOptions } from 'src/repositories/asset.repository';
import { BoundingBox } from 'src/repositories/machine-learning.repository';
import { BaseService } from 'src/services/base.service';
import {
@@ -42,6 +41,11 @@ import { getAssetFiles } from 'src/utils/asset.util';
import { BaseConfig, ThumbnailConfig } from 'src/utils/media';
import { mimeTypes } from 'src/utils/mime-types';
import { clamp, isFaceImportEnabled, isFacialRecognitionEnabled } from 'src/utils/misc';
interface UpsertFileOptions {
assetId: string;
type: AssetFileType;
path: string;
}
@Injectable()
export class MediaService extends BaseService {