mirror of
https://github.com/immich-app/immich.git
synced 2025-12-30 01:11:52 +03:00
feat(ml): ML on Rockchip NPUs (#15241)
This commit is contained in:
@@ -64,6 +64,8 @@ class Settings(BaseSettings):
|
||||
ann: bool = True
|
||||
ann_fp16_turbo: bool = False
|
||||
ann_tuning_level: int = 2
|
||||
rknn: bool = True
|
||||
rknn_threads: int = 1
|
||||
preload: PreloadModelData | None = None
|
||||
max_batch_size: MaxBatchSize | None = None
|
||||
|
||||
|
||||
@@ -136,6 +136,12 @@ def ann_session() -> Iterator[mock.Mock]:
|
||||
yield mocked
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def rknn_session() -> Iterator[mock.Mock]:
|
||||
with mock.patch("app.sessions.rknn.RknnPoolExecutor") as mocked:
|
||||
yield mocked
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def rmtree() -> Iterator[mock.Mock]:
|
||||
with mock.patch("app.models.base.rmtree", autospec=True) as mocked:
|
||||
|
||||
@@ -226,9 +226,9 @@ async def load(model: InferenceModel) -> InferenceModel:
|
||||
except FileNotFoundError as e:
|
||||
if model.model_format == ModelFormat.ONNX:
|
||||
raise e
|
||||
log.exception(e)
|
||||
log.warning(
|
||||
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it."
|
||||
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
|
||||
exc_info=e,
|
||||
)
|
||||
model.model_format = ModelFormat.ONNX
|
||||
model.load()
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import Any, ClassVar
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
import ann.ann
|
||||
import app.sessions.rknn as rknn
|
||||
from app.sessions.ort import OrtSession
|
||||
|
||||
from ..config import clean_name, log, settings
|
||||
@@ -66,12 +67,17 @@ class InferenceModel(ABC):
|
||||
pass
|
||||
|
||||
def _download(self) -> None:
|
||||
ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
|
||||
ignored_patterns: dict[ModelFormat, list[str]] = {
|
||||
ModelFormat.ONNX: ["*.armnn", "*.rknn"],
|
||||
ModelFormat.ARMNN: ["*.rknn"],
|
||||
ModelFormat.RKNN: ["*.armnn"],
|
||||
}
|
||||
|
||||
snapshot_download(
|
||||
f"immich-app/{clean_name(self.model_name)}",
|
||||
cache_dir=self.cache_dir,
|
||||
local_dir=self.cache_dir,
|
||||
ignore_patterns=ignore_patterns,
|
||||
ignore_patterns=ignored_patterns.get(self.model_format, []),
|
||||
)
|
||||
|
||||
def _load(self) -> ModelSession:
|
||||
@@ -108,17 +114,25 @@ class InferenceModel(ABC):
|
||||
session: ModelSession = AnnSession(model_path)
|
||||
case ".onnx":
|
||||
session = OrtSession(model_path)
|
||||
case ".rknn":
|
||||
session = rknn.RknnSession(model_path)
|
||||
case _:
|
||||
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
|
||||
return session
|
||||
|
||||
def model_path_for_format(self, model_format: ModelFormat) -> Path:
|
||||
model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
|
||||
if model_path_prefix:
|
||||
return self.model_dir / model_path_prefix / f"model.{model_format}"
|
||||
return self.model_dir / f"model.{model_format}"
|
||||
|
||||
@property
|
||||
def model_dir(self) -> Path:
|
||||
return self.cache_dir / self.model_type.value
|
||||
|
||||
@property
|
||||
def model_path(self) -> Path:
|
||||
return self.model_dir / f"model.{self.model_format}"
|
||||
return self.model_path_for_format(self.model_format)
|
||||
|
||||
@property
|
||||
def model_task(self) -> ModelTask:
|
||||
@@ -155,4 +169,9 @@ class InferenceModel(ABC):
|
||||
|
||||
@property
|
||||
def _model_format_default(self) -> ModelFormat:
|
||||
return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
|
||||
if rknn.is_available:
|
||||
return ModelFormat.RKNN
|
||||
elif ann.ann.is_available and settings.ann:
|
||||
return ModelFormat.ARMNN
|
||||
else:
|
||||
return ModelFormat.ONNX
|
||||
|
||||
@@ -44,6 +44,18 @@ _OPENCLIP_MODELS = {
|
||||
"nllb-clip-base-siglip__v1",
|
||||
"nllb-clip-large-siglip__mrl",
|
||||
"nllb-clip-large-siglip__v1",
|
||||
"ViT-B-16-SigLIP2__webli",
|
||||
"ViT-B-32-SigLIP2-256__webli",
|
||||
"ViT-L-16-SigLIP2-256__webli",
|
||||
"ViT-L-16-SigLIP2-384__webli",
|
||||
"ViT-L-16-SigLIP2-512__webli",
|
||||
"ViT-SO400M-14-SigLIP2-378__webli",
|
||||
"ViT-SO400M-14-SigLIP2__webli",
|
||||
"ViT-SO400M-16-SigLIP2-256__webli",
|
||||
"ViT-SO400M-16-SigLIP2-384__webli",
|
||||
"ViT-SO400M-16-SigLIP2-512__webli",
|
||||
"ViT-gopt-16-SigLIP2-256__webli",
|
||||
"ViT-gopt-16-SigLIP2-384__webli",
|
||||
}
|
||||
|
||||
|
||||
@@ -65,6 +77,9 @@ _INSIGHTFACE_MODELS = {
|
||||
|
||||
SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
|
||||
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
|
||||
|
||||
|
||||
def get_model_source(model_name: str) -> ModelSource | None:
|
||||
cleaned_name = clean_name(model_name)
|
||||
|
||||
@@ -31,7 +31,7 @@ class FaceRecognizer(InferenceModel):
|
||||
self._add_batch_axis(self.model_path)
|
||||
session = self._make_session(self.model_path)
|
||||
self.model = ArcFaceONNX(
|
||||
self.model_path.with_suffix(".onnx").as_posix(),
|
||||
self.model_path_for_format(ModelFormat.ONNX).as_posix(),
|
||||
session=session,
|
||||
)
|
||||
return session
|
||||
|
||||
@@ -35,6 +35,7 @@ class ModelType(StrEnum):
|
||||
class ModelFormat(StrEnum):
|
||||
ARMNN = "armnn"
|
||||
ONNX = "onnx"
|
||||
RKNN = "rknn"
|
||||
|
||||
|
||||
class ModelSource(StrEnum):
|
||||
|
||||
76
machine-learning/app/sessions/rknn/__init__.py
Normal file
76
machine-learning/app/sessions/rknn/__init__.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, NamedTuple
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from app.config import log, settings
|
||||
from app.schemas import SessionNode
|
||||
|
||||
from .rknnpool import RknnPoolExecutor, is_available, soc_name
|
||||
|
||||
is_available = is_available and settings.rknn
|
||||
model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
|
||||
|
||||
|
||||
def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
|
||||
outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
|
||||
return outputs
|
||||
|
||||
|
||||
input_output_mapping: dict[str, dict[str, Any]] = {
|
||||
"detection": {
|
||||
"input": {"norm_tensor:0": (1, 3, 640, 640)},
|
||||
"output": {
|
||||
"norm_tensor:1": (12800, 1),
|
||||
"norm_tensor:2": (3200, 1),
|
||||
"norm_tensor:3": (800, 1),
|
||||
"norm_tensor:4": (12800, 4),
|
||||
"norm_tensor:5": (3200, 4),
|
||||
"norm_tensor:6": (800, 4),
|
||||
"norm_tensor:7": (12800, 10),
|
||||
"norm_tensor:8": (3200, 10),
|
||||
"norm_tensor:9": (800, 10),
|
||||
},
|
||||
},
|
||||
"recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
|
||||
}
|
||||
|
||||
|
||||
class RknnSession:
|
||||
def __init__(self, model_path: Path) -> None:
|
||||
self.model_type = "detection" if "detection" in model_path.parts else "recognition"
|
||||
self.tpe = settings.rknn_threads
|
||||
|
||||
log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
|
||||
self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
|
||||
log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
|
||||
|
||||
def get_inputs(self) -> list[SessionNode]:
|
||||
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
|
||||
|
||||
def get_outputs(self) -> list[SessionNode]:
|
||||
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
|
||||
|
||||
def run(
|
||||
self,
|
||||
output_names: list[str] | None,
|
||||
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
|
||||
run_options: Any = None,
|
||||
) -> list[NDArray[np.float32]]:
|
||||
input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
|
||||
self.rknnpool.put(input_data)
|
||||
res = self.rknnpool.get()
|
||||
if res is None:
|
||||
raise RuntimeError("RKNN inference failed!")
|
||||
return res
|
||||
|
||||
|
||||
class RknnNode(NamedTuple):
|
||||
name: str | None
|
||||
shape: tuple[int, ...]
|
||||
|
||||
|
||||
__all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]
|
||||
91
machine-learning/app/sessions/rknn/rknnpool.py
Normal file
91
machine-learning/app/sessions/rknn/rknnpool.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# This code is from leafqycc/rknn-multi-threaded
|
||||
# Following Apache License 2.0
|
||||
|
||||
import logging
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from queue import Queue
|
||||
from typing import Callable
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from app.config import log
|
||||
from app.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
|
||||
|
||||
|
||||
def get_soc(device_tree_path: Path | str) -> str | None:
|
||||
try:
|
||||
with Path(device_tree_path).open() as f:
|
||||
device_compatible_str = f.read()
|
||||
for soc in RKNN_SUPPORTED_SOCS:
|
||||
if soc in device_compatible_str:
|
||||
return soc
|
||||
log.warning("Device is not supported for RKNN")
|
||||
except OSError as e:
|
||||
log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
soc_name = None
|
||||
is_available = False
|
||||
try:
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
soc_name = get_soc("/proc/device-tree/compatible")
|
||||
is_available = soc_name is not None
|
||||
except ImportError:
|
||||
log.debug("RKNN is not available")
|
||||
|
||||
|
||||
def init_rknn(model_path: str) -> "RKNNLite":
|
||||
if not is_available:
|
||||
raise RuntimeError("rknn is not available!")
|
||||
rknn_lite = RKNNLite()
|
||||
rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
|
||||
ret = rknn_lite.load_rknn(model_path)
|
||||
if ret != 0:
|
||||
raise RuntimeError("Failed to load RKNN model")
|
||||
|
||||
if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
|
||||
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
|
||||
else:
|
||||
ret = rknn_lite.init_runtime() # Please do not set this parameter on other platforms.
|
||||
|
||||
if ret != 0:
|
||||
raise RuntimeError("Failed to inititalize RKNN runtime environment")
|
||||
|
||||
return rknn_lite
|
||||
|
||||
|
||||
class RknnPoolExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
tpes: int,
|
||||
func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
|
||||
) -> None:
|
||||
self.tpes = tpes
|
||||
self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
|
||||
self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
|
||||
self.pool = ThreadPoolExecutor(max_workers=tpes)
|
||||
self.func = func
|
||||
self.num = 0
|
||||
|
||||
def put(self, inputs: list[NDArray[np.float32]]) -> None:
|
||||
self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
|
||||
self.num += 1
|
||||
|
||||
def get(self) -> list[NDArray[np.float32]] | None:
|
||||
if self.queue.empty():
|
||||
return None
|
||||
fut = self.queue.get()
|
||||
return fut.result()
|
||||
|
||||
def release(self) -> None:
|
||||
self.pool.shutdown()
|
||||
for rknn_lite in self.rknn_pool:
|
||||
rknn_lite.release()
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.release()
|
||||
@@ -25,6 +25,7 @@ from app.models.facial_recognition.detection import FaceDetector
|
||||
from app.models.facial_recognition.recognition import FaceRecognizer
|
||||
from app.sessions.ann import AnnSession
|
||||
from app.sessions.ort import OrtSession
|
||||
from app.sessions.rknn import RknnSession, run_inference
|
||||
|
||||
from .config import Settings, settings
|
||||
from .models.base import InferenceModel
|
||||
@@ -69,6 +70,14 @@ class TestBase:
|
||||
|
||||
assert encoder.model_format == ModelFormat.ARMNN
|
||||
|
||||
def test_sets_default_model_format_to_rknn_if_available(self, mocker: MockerFixture) -> None:
|
||||
mocker.patch.object(settings, "rknn", True)
|
||||
mocker.patch("app.sessions.rknn.is_available", True)
|
||||
|
||||
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
|
||||
|
||||
assert encoder.model_format == ModelFormat.RKNN
|
||||
|
||||
def test_casts_cache_dir_string_to_path(self) -> None:
|
||||
cache_dir = "/test_cache"
|
||||
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir)
|
||||
@@ -125,7 +134,7 @@ class TestBase:
|
||||
"immich-app/ViT-B-32__openai",
|
||||
cache_dir=encoder.cache_dir,
|
||||
local_dir=encoder.cache_dir,
|
||||
ignore_patterns=["*.armnn"],
|
||||
ignore_patterns=["*.armnn", "*.rknn"],
|
||||
)
|
||||
|
||||
def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
|
||||
@@ -136,7 +145,18 @@ class TestBase:
|
||||
"immich-app/ViT-B-32__openai",
|
||||
cache_dir=encoder.cache_dir,
|
||||
local_dir=encoder.cache_dir,
|
||||
ignore_patterns=[],
|
||||
ignore_patterns=["*.rknn"],
|
||||
)
|
||||
|
||||
def test_download_downloads_rknn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
|
||||
encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.RKNN)
|
||||
encoder.download()
|
||||
|
||||
snapshot_download.assert_called_once_with(
|
||||
"immich-app/ViT-B-32__openai",
|
||||
cache_dir=encoder.cache_dir,
|
||||
local_dir=encoder.cache_dir,
|
||||
ignore_patterns=["*.armnn"],
|
||||
)
|
||||
|
||||
def test_throws_exception_if_model_path_does_not_exist(
|
||||
@@ -328,6 +348,33 @@ class TestAnnSession:
|
||||
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
|
||||
|
||||
|
||||
class TestRknnSession:
|
||||
def test_creates_rknn_session(self, rknn_session: mock.Mock, info: mock.Mock, mocker: MockerFixture) -> None:
|
||||
model_path = mock.MagicMock(spec=Path)
|
||||
tpe = 1
|
||||
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
|
||||
mocker.patch("app.sessions.rknn.is_available", True)
|
||||
RknnSession(model_path)
|
||||
|
||||
rknn_session.assert_called_once_with(model_path=model_path.as_posix(), tpes=tpe, func=run_inference)
|
||||
|
||||
info.assert_has_calls([mock.call(f"Loaded RKNN model from {model_path} with {tpe} threads.")])
|
||||
|
||||
def test_run_rknn(self, rknn_session: mock.Mock, mocker: MockerFixture) -> None:
|
||||
rknn_session.return_value.load.return_value = 123
|
||||
np_spy = mocker.spy(np, "ascontiguousarray")
|
||||
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
|
||||
session = RknnSession(Path("ViT-B-32__openai"))
|
||||
[input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)]
|
||||
input_feed = {"input.1": input1, "input.2": input2}
|
||||
|
||||
session.run(None, input_feed)
|
||||
|
||||
rknn_session.return_value.put.assert_called_once_with([input1, input2])
|
||||
np_spy.call_count == 2
|
||||
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
|
||||
|
||||
|
||||
class TestCLIP:
|
||||
embedding = np.random.rand(512).astype(np.float32)
|
||||
cache_dir = Path("test_cache")
|
||||
@@ -829,9 +876,7 @@ class TestLoad:
|
||||
mock_model.clear_cache.assert_not_called()
|
||||
mock_model.load.assert_not_called()
|
||||
|
||||
async def test_falls_back_to_onnx_if_other_format_does_not_exist(
|
||||
self, exception: mock.Mock, warning: mock.Mock
|
||||
) -> None:
|
||||
async def test_falls_back_to_onnx_if_other_format_does_not_exist(self, warning: mock.Mock) -> None:
|
||||
mock_model = mock.Mock(spec=InferenceModel)
|
||||
mock_model.model_name = "test_model_name"
|
||||
mock_model.model_type = ModelType.VISUAL
|
||||
@@ -846,8 +891,9 @@ class TestLoad:
|
||||
|
||||
mock_model.clear_cache.assert_not_called()
|
||||
assert mock_model.load.call_count == 2
|
||||
exception.assert_called_once_with(error)
|
||||
warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.")
|
||||
warning.assert_called_once_with(
|
||||
"ARMNN is available, but model 'test_model_name' does not support it.", exc_info=error
|
||||
)
|
||||
mock_model.model_format = ModelFormat.ONNX
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user