mirror of
https://github.com/immich-app/immich.git
synced 2025-12-19 01:11:07 +03:00
feat(ml): composable ml (#9973)
* modularize model classes * various fixes * expose port * change response * round coordinates * simplify preload * update server * simplify interface simplify * update tests * composable endpoint * cleanup fixes remove unnecessary interface support text input, cleanup * ew camelcase * update server server fixes fix typing * ml fixes update locustfile fixes * cleaner response * better repo response * update tests formatting and typing rename * undo compose change * linting fix type actually fix typing * stricter typing fix detection-only response no need for defaultdict * update spec file update api linting * update e2e * unnecessary dimension * remove commented code * remove duplicate code * remove unused imports * add batch dim
This commit is contained in:
@@ -1,15 +1,5 @@
|
||||
import { CLIPConfig, RecognitionConfig } from 'src/dtos/model-config.dto';
|
||||
|
||||
export const IMachineLearningRepository = 'IMachineLearningRepository';
|
||||
|
||||
export interface VisionModelInput {
|
||||
imagePath: string;
|
||||
}
|
||||
|
||||
export interface TextModelInput {
|
||||
text: string;
|
||||
}
|
||||
|
||||
export interface BoundingBox {
|
||||
x1: number;
|
||||
y1: number;
|
||||
@@ -17,26 +7,51 @@ export interface BoundingBox {
|
||||
y2: number;
|
||||
}
|
||||
|
||||
export interface DetectFaceResult {
|
||||
imageWidth: number;
|
||||
imageHeight: number;
|
||||
boundingBox: BoundingBox;
|
||||
score: number;
|
||||
embedding: number[];
|
||||
export enum ModelTask {
|
||||
FACIAL_RECOGNITION = 'facial-recognition',
|
||||
SEARCH = 'clip',
|
||||
}
|
||||
|
||||
export enum ModelType {
|
||||
FACIAL_RECOGNITION = 'facial-recognition',
|
||||
CLIP = 'clip',
|
||||
DETECTION = 'detection',
|
||||
PIPELINE = 'pipeline',
|
||||
RECOGNITION = 'recognition',
|
||||
TEXTUAL = 'textual',
|
||||
VISUAL = 'visual',
|
||||
}
|
||||
|
||||
export enum CLIPMode {
|
||||
VISION = 'vision',
|
||||
TEXT = 'text',
|
||||
export type ModelPayload = { imagePath: string } | { text: string };
|
||||
|
||||
type ModelOptions = { modelName: string };
|
||||
|
||||
export type FaceDetectionOptions = ModelOptions & { minScore: number };
|
||||
|
||||
type VisualResponse = { imageHeight: number; imageWidth: number };
|
||||
export type ClipVisualRequest = { [ModelTask.SEARCH]: { [ModelType.VISUAL]: ModelOptions } };
|
||||
export type ClipVisualResponse = { [ModelTask.SEARCH]: number[] } & VisualResponse;
|
||||
|
||||
export type ClipTextualRequest = { [ModelTask.SEARCH]: { [ModelType.TEXTUAL]: ModelOptions } };
|
||||
export type ClipTextualResponse = { [ModelTask.SEARCH]: number[] };
|
||||
|
||||
export type FacialRecognitionRequest = {
|
||||
[ModelTask.FACIAL_RECOGNITION]: {
|
||||
[ModelType.DETECTION]: FaceDetectionOptions;
|
||||
[ModelType.RECOGNITION]: ModelOptions;
|
||||
};
|
||||
};
|
||||
|
||||
export interface Face {
|
||||
boundingBox: BoundingBox;
|
||||
embedding: number[];
|
||||
score: number;
|
||||
}
|
||||
|
||||
export type FacialRecognitionResponse = { [ModelTask.FACIAL_RECOGNITION]: Face[] } & VisualResponse;
|
||||
export type DetectedFaces = { faces: Face[] } & VisualResponse;
|
||||
export type MachineLearningRequest = ClipVisualRequest | ClipTextualRequest | FacialRecognitionRequest;
|
||||
|
||||
export interface IMachineLearningRepository {
|
||||
encodeImage(url: string, input: VisionModelInput, config: CLIPConfig): Promise<number[]>;
|
||||
encodeText(url: string, input: TextModelInput, config: CLIPConfig): Promise<number[]>;
|
||||
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]>;
|
||||
encodeImage(url: string, imagePath: string, config: ModelOptions): Promise<number[]>;
|
||||
encodeText(url: string, text: string, config: ModelOptions): Promise<number[]>;
|
||||
detectFaces(url: string, imagePath: string, config: FaceDetectionOptions): Promise<DetectedFaces>;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user