mirror of
https://github.com/immich-app/immich.git
synced 2025-12-06 09:13:13 +03:00
feat(ml): multilingual ocr (#23527)
* handle other languages in ml server * add variants to model selector * no need to override path * unused import
This commit is contained in:
@@ -78,6 +78,14 @@ _INSIGHTFACE_MODELS = {
|
||||
_PADDLE_MODELS = {
|
||||
"PP-OCRv5_server",
|
||||
"PP-OCRv5_mobile",
|
||||
"CH__PP-OCRv5_server",
|
||||
"CH__PP-OCRv5_mobile",
|
||||
"EL__PP-OCRv5_mobile",
|
||||
"EN__PP-OCRv5_mobile",
|
||||
"ESLAV__PP-OCRv5_mobile",
|
||||
"KOREAN__PP-OCRv5_mobile",
|
||||
"LATIN__PP-OCRv5_mobile",
|
||||
"TH__PP-OCRv5_mobile",
|
||||
}
|
||||
|
||||
SUPPORTED_PROVIDERS = [
|
||||
|
||||
@@ -23,7 +23,7 @@ class TextDetector(InferenceModel):
|
||||
identity = (ModelType.DETECTION, ModelTask.OCR)
|
||||
|
||||
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
|
||||
super().__init__(model_name, **model_kwargs, model_format=ModelFormat.ONNX)
|
||||
super().__init__(model_name.split("__")[-1], **model_kwargs, model_format=ModelFormat.ONNX)
|
||||
self.max_resolution = 736
|
||||
self.mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
|
||||
self.std_inv = np.float32(1.0) / (np.array([0.5, 0.5, 0.5], dtype=np.float32) * 255.0)
|
||||
|
||||
@@ -25,6 +25,7 @@ class TextRecognizer(InferenceModel):
|
||||
identity = (ModelType.RECOGNITION, ModelTask.OCR)
|
||||
|
||||
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
|
||||
self.language = LangRec[model_name.split("__")[0]] if "__" in model_name else LangRec.CH
|
||||
self.min_score = model_kwargs.get("minScore", 0.9)
|
||||
self._empty: TextRecognitionOutput = {
|
||||
"box": np.empty(0, dtype=np.float32),
|
||||
@@ -41,7 +42,7 @@ class TextRecognizer(InferenceModel):
|
||||
engine_type=EngineType.ONNXRUNTIME,
|
||||
ocr_version=OCRVersion.PPOCRV5,
|
||||
task_type=TaskType.REC,
|
||||
lang_type=LangRec.CH,
|
||||
lang_type=self.language,
|
||||
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
|
||||
)
|
||||
)
|
||||
@@ -61,6 +62,7 @@ class TextRecognizer(InferenceModel):
|
||||
session=session.session,
|
||||
rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6,
|
||||
rec_img_shape=(3, 48, 320),
|
||||
lang_type=self.language,
|
||||
)
|
||||
)
|
||||
return session
|
||||
|
||||
@@ -20,8 +20,8 @@ class TextRecognitionOutput(TypedDict):
|
||||
|
||||
# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
|
||||
class OcrOptions(dict[str, Any]):
|
||||
def __init__(self, **options: Any) -> None:
|
||||
def __init__(self, lang_type: LangRec | None = None, **options: Any) -> None:
|
||||
super().__init__(**options)
|
||||
self.engine_type = EngineType.ONNXRUNTIME
|
||||
self.lang_type = LangRec.CH
|
||||
self.lang_type = lang_type
|
||||
self.font_path = None
|
||||
|
||||
@@ -275,8 +275,14 @@
|
||||
name="ocr-model"
|
||||
bind:value={config.machineLearning.ocr.modelName}
|
||||
options={[
|
||||
{ value: 'PP-OCRv5_server', text: 'PP-OCRv5_server' },
|
||||
{ value: 'PP-OCRv5_mobile', text: 'PP-OCRv5_mobile' },
|
||||
{ text: 'PP-OCRv5_server (Chinese, Japanese and English)', value: 'PP-OCRv5_server' },
|
||||
{ text: 'PP-OCRv5_mobile (Chinese, Japanese and English)', value: 'PP-OCRv5_mobile' },
|
||||
{ text: 'PP-OCRv5_mobile (English-only)', value: 'EN__PP-OCRv5_mobile' },
|
||||
{ text: 'PP-OCRv5_mobile (Greek and English)', value: 'EL__PP-OCRv5_mobile' },
|
||||
{ text: 'PP-OCRv5_mobile (Korean and English)', value: 'KOREAN__PP-OCRv5_mobile' },
|
||||
{ text: 'PP-OCRv5_mobile (Latin script languages)', value: 'LATIN__PP-OCRv5_mobile' },
|
||||
{ text: 'PP-OCRv5_mobile (Russian, Belarusian, Ukrainian and English)', value: 'ESLAV__PP-OCRv5_mobile' },
|
||||
{ text: 'PP-OCRv5_mobile (Thai and English)', value: 'TH__PP-OCRv5_mobile' },
|
||||
]}
|
||||
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
|
||||
isEdited={config.machineLearning.ocr.modelName !== savedConfig.machineLearning.ocr.modelName}
|
||||
|
||||
Reference in New Issue
Block a user