feat(ml): multilingual ocr (#23527)

* handle other languages in ml server

* add variants to model selector

* no need to override path

* unused import
This commit is contained in:
Mert
2025-11-06 12:58:41 -05:00
committed by GitHub
parent a4ae86ce29
commit 6913697ad1
5 changed files with 22 additions and 6 deletions

View File

@@ -78,6 +78,14 @@ _INSIGHTFACE_MODELS = {
_PADDLE_MODELS = {
"PP-OCRv5_server",
"PP-OCRv5_mobile",
"CH__PP-OCRv5_server",
"CH__PP-OCRv5_mobile",
"EL__PP-OCRv5_mobile",
"EN__PP-OCRv5_mobile",
"ESLAV__PP-OCRv5_mobile",
"KOREAN__PP-OCRv5_mobile",
"LATIN__PP-OCRv5_mobile",
"TH__PP-OCRv5_mobile",
}
SUPPORTED_PROVIDERS = [

View File

@@ -23,7 +23,7 @@ class TextDetector(InferenceModel):
identity = (ModelType.DETECTION, ModelTask.OCR)
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
super().__init__(model_name, **model_kwargs, model_format=ModelFormat.ONNX)
super().__init__(model_name.split("__")[-1], **model_kwargs, model_format=ModelFormat.ONNX)
self.max_resolution = 736
self.mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
self.std_inv = np.float32(1.0) / (np.array([0.5, 0.5, 0.5], dtype=np.float32) * 255.0)

View File

@@ -25,6 +25,7 @@ class TextRecognizer(InferenceModel):
identity = (ModelType.RECOGNITION, ModelTask.OCR)
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
self.language = LangRec[model_name.split("__")[0]] if "__" in model_name else LangRec.CH
self.min_score = model_kwargs.get("minScore", 0.9)
self._empty: TextRecognitionOutput = {
"box": np.empty(0, dtype=np.float32),
@@ -41,7 +42,7 @@ class TextRecognizer(InferenceModel):
engine_type=EngineType.ONNXRUNTIME,
ocr_version=OCRVersion.PPOCRV5,
task_type=TaskType.REC,
lang_type=LangRec.CH,
lang_type=self.language,
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
)
)
@@ -61,6 +62,7 @@ class TextRecognizer(InferenceModel):
session=session.session,
rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6,
rec_img_shape=(3, 48, 320),
lang_type=self.language,
)
)
return session

View File

@@ -20,8 +20,8 @@ class TextRecognitionOutput(TypedDict):
# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
class OcrOptions(dict[str, Any]):
def __init__(self, **options: Any) -> None:
def __init__(self, lang_type: LangRec | None = None, **options: Any) -> None:
super().__init__(**options)
self.engine_type = EngineType.ONNXRUNTIME
self.lang_type = LangRec.CH
self.lang_type = lang_type
self.font_path = None

View File

@@ -275,8 +275,14 @@
name="ocr-model"
bind:value={config.machineLearning.ocr.modelName}
options={[
{ value: 'PP-OCRv5_server', text: 'PP-OCRv5_server' },
{ value: 'PP-OCRv5_mobile', text: 'PP-OCRv5_mobile' },
{ text: 'PP-OCRv5_server (Chinese, Japanese and English)', value: 'PP-OCRv5_server' },
{ text: 'PP-OCRv5_mobile (Chinese, Japanese and English)', value: 'PP-OCRv5_mobile' },
{ text: 'PP-OCRv5_mobile (English-only)', value: 'EN__PP-OCRv5_mobile' },
{ text: 'PP-OCRv5_mobile (Greek and English)', value: 'EL__PP-OCRv5_mobile' },
{ text: 'PP-OCRv5_mobile (Korean and English)', value: 'KOREAN__PP-OCRv5_mobile' },
{ text: 'PP-OCRv5_mobile (Latin script languages)', value: 'LATIN__PP-OCRv5_mobile' },
{ text: 'PP-OCRv5_mobile (Russian, Belarusian, Ukrainian and English)', value: 'ESLAV__PP-OCRv5_mobile' },
{ text: 'PP-OCRv5_mobile (Thai and English)', value: 'TH__PP-OCRv5_mobile' },
]}
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
isEdited={config.machineLearning.ocr.modelName !== savedConfig.machineLearning.ocr.modelName}