feat(server): near-duplicate detection (#8228)

* duplicate detection job, entity, config

* queueing

* job panel, update api

* use embedding in db instead of fetching

* disable concurrency

* only queue visible assets

* handle multiple duplicateIds

* update concurrent queue check

* add provider

* add web placeholder, server endpoint, migration, various fixes

* update sql

* select embedding by default

* rename variable

* simplify

* remove separate entity, handle re-running with different threshold, set default back to 0.02

* fix tests

* add tests

* add index to entity

* formatting

* update asset mock

* fix `upsertJobStatus` signature

* update sql

* formatting

* default to 0.03

* optimize clustering

* use asset's `duplicateId` if present

* update sql

* update tests

* expose admin setting

* refactor

* formatting

* skip if ml is disabled

* debug trash e2e

* remove from web

* remove from sidebar

* test if ml is disabled

* update sql

* separate duplicate detection from clip in config, disable by default for now

* fix doc

* lower minimum `maxDistance`

* update api

* Add and Use Duplicate Detection Feature Flag (#9364)

* Add Duplicate Detection Flag

* Use Duplicate Detection Flag

* Attempt Fixes for Failing Checks

* lower minimum `maxDistance`

* fix tests

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>

* chore: fixes and additions after rebase

* chore: update api (remove new Role enum)

* fix: left join smart search so getAll works without machine learning

* test: trash e2e go back to checking length of assets is zero

* chore: regen api after rebase

* test: fix tests after rebase

* redundant join

---------

Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com>
Co-authored-by: Zack Pollard <zackpollard@ymail.com>
Co-authored-by: Zack Pollard <zack@futo.org>
This commit is contained in:
Mert
2024-05-16 13:08:37 -04:00
committed by GitHub
parent 673e97e71d
commit 64636c0618
61 changed files with 1254 additions and 61 deletions

View File

@@ -1194,6 +1194,30 @@
]
}
},
"/asset/duplicates": {
"get": {
"operationId": "getAssetDuplicates",
"parameters": [],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"items": {
"$ref": "#/components/schemas/AssetResponseDto"
},
"type": "array"
}
}
},
"description": ""
}
},
"tags": [
"Asset"
]
}
},
"/asset/exist": {
"post": {
"description": "Checks if multiple assets exist on the server and returns all existing - used by background backup",
@@ -6812,6 +6836,9 @@
"backgroundTask": {
"$ref": "#/components/schemas/JobStatusDto"
},
"duplicateDetection": {
"$ref": "#/components/schemas/JobStatusDto"
},
"faceDetection": {
"$ref": "#/components/schemas/JobStatusDto"
},
@@ -6851,6 +6878,7 @@
},
"required": [
"backgroundTask",
"duplicateDetection",
"faceDetection",
"facialRecognition",
"library",
@@ -7873,6 +7901,24 @@
],
"type": "object"
},
"DuplicateDetectionConfig": {
"properties": {
"enabled": {
"type": "boolean"
},
"maxDistance": {
"format": "float",
"maximum": 0.1,
"minimum": 0.001,
"type": "number"
}
},
"required": [
"enabled",
"maxDistance"
],
"type": "object"
},
"EntityType": {
"enum": [
"ASSET",
@@ -8167,6 +8213,7 @@
"faceDetection",
"facialRecognition",
"smartSearch",
"duplicateDetection",
"backgroundTask",
"storageTemplateMigration",
"migration",
@@ -9379,6 +9426,9 @@
"configFile": {
"type": "boolean"
},
"duplicateDetection": {
"type": "boolean"
},
"email": {
"type": "boolean"
},
@@ -9415,6 +9465,7 @@
},
"required": [
"configFile",
"duplicateDetection",
"email",
"facialRecognition",
"map",
@@ -10247,6 +10298,9 @@
"clip": {
"$ref": "#/components/schemas/CLIPConfig"
},
"duplicateDetection": {
"$ref": "#/components/schemas/DuplicateDetectionConfig"
},
"enabled": {
"type": "boolean"
},
@@ -10259,6 +10313,7 @@
},
"required": [
"clip",
"duplicateDetection",
"enabled",
"facialRecognition",
"url"

View File

@@ -410,6 +410,7 @@ export type JobStatusDto = {
};
export type AllJobStatusResponseDto = {
backgroundTask: JobStatusDto;
duplicateDetection: JobStatusDto;
faceDetection: JobStatusDto;
facialRecognition: JobStatusDto;
library: JobStatusDto;
@@ -748,6 +749,7 @@ export type ServerConfigDto = {
};
export type ServerFeaturesDto = {
configFile: boolean;
duplicateDetection: boolean;
email: boolean;
facialRecognition: boolean;
map: boolean;
@@ -927,6 +929,10 @@ export type ClipConfig = {
modelName: string;
modelType?: ModelType;
};
export type DuplicateDetectionConfig = {
enabled: boolean;
maxDistance: number;
};
export type RecognitionConfig = {
enabled: boolean;
maxDistance: number;
@@ -937,6 +943,7 @@ export type RecognitionConfig = {
};
export type SystemConfigMachineLearningDto = {
clip: ClipConfig;
duplicateDetection: DuplicateDetectionConfig;
enabled: boolean;
facialRecognition: RecognitionConfig;
url: string;
@@ -1399,6 +1406,14 @@ export function getAllUserAssetsByDeviceId({ deviceId }: {
...opts
}));
}
export function getAssetDuplicates(opts?: Oazapfts.RequestOpts) {
return oazapfts.ok(oazapfts.fetchJson<{
status: 200;
data: AssetResponseDto[];
}>("/asset/duplicates", {
...opts
}));
}
/**
* Checks if multiple assets exist on the server and returns all existing - used by background backup
*/
@@ -2876,6 +2891,7 @@ export enum JobName {
FaceDetection = "faceDetection",
FacialRecognition = "facialRecognition",
SmartSearch = "smartSearch",
DuplicateDetection = "duplicateDetection",
BackgroundTask = "backgroundTask",
StorageTemplateMigration = "storageTemplateMigration",
Migration = "migration",