feat(ml): ML on Rockchip NPUs (#15241)

This commit is contained in:
Yoni Yang
2025-03-18 00:04:08 +08:00
committed by GitHub
parent 1e184a70f1
commit 14c3b99c0f
43 changed files with 2417 additions and 4726 deletions

View File

@@ -64,6 +64,8 @@ class Settings(BaseSettings):
ann: bool = True
ann_fp16_turbo: bool = False
ann_tuning_level: int = 2
rknn: bool = True
rknn_threads: int = 1
preload: PreloadModelData | None = None
max_batch_size: MaxBatchSize | None = None

View File

@@ -136,6 +136,12 @@ def ann_session() -> Iterator[mock.Mock]:
yield mocked
@pytest.fixture(scope="function")
def rknn_session() -> Iterator[mock.Mock]:
with mock.patch("app.sessions.rknn.RknnPoolExecutor") as mocked:
yield mocked
@pytest.fixture(scope="function")
def rmtree() -> Iterator[mock.Mock]:
with mock.patch("app.models.base.rmtree", autospec=True) as mocked:

View File

@@ -226,9 +226,9 @@ async def load(model: InferenceModel) -> InferenceModel:
except FileNotFoundError as e:
if model.model_format == ModelFormat.ONNX:
raise e
log.exception(e)
log.warning(
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it."
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
exc_info=e,
)
model.model_format = ModelFormat.ONNX
model.load()

View File

@@ -8,6 +8,7 @@ from typing import Any, ClassVar
from huggingface_hub import snapshot_download
import ann.ann
import app.sessions.rknn as rknn
from app.sessions.ort import OrtSession
from ..config import clean_name, log, settings
@@ -66,12 +67,17 @@ class InferenceModel(ABC):
pass
def _download(self) -> None:
ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
ignored_patterns: dict[ModelFormat, list[str]] = {
ModelFormat.ONNX: ["*.armnn", "*.rknn"],
ModelFormat.ARMNN: ["*.rknn"],
ModelFormat.RKNN: ["*.armnn"],
}
snapshot_download(
f"immich-app/{clean_name(self.model_name)}",
cache_dir=self.cache_dir,
local_dir=self.cache_dir,
ignore_patterns=ignore_patterns,
ignore_patterns=ignored_patterns.get(self.model_format, []),
)
def _load(self) -> ModelSession:
@@ -108,17 +114,25 @@ class InferenceModel(ABC):
session: ModelSession = AnnSession(model_path)
case ".onnx":
session = OrtSession(model_path)
case ".rknn":
session = rknn.RknnSession(model_path)
case _:
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
return session
def model_path_for_format(self, model_format: ModelFormat) -> Path:
model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
if model_path_prefix:
return self.model_dir / model_path_prefix / f"model.{model_format}"
return self.model_dir / f"model.{model_format}"
@property
def model_dir(self) -> Path:
return self.cache_dir / self.model_type.value
@property
def model_path(self) -> Path:
return self.model_dir / f"model.{self.model_format}"
return self.model_path_for_format(self.model_format)
@property
def model_task(self) -> ModelTask:
@@ -155,4 +169,9 @@ class InferenceModel(ABC):
@property
def _model_format_default(self) -> ModelFormat:
return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
if rknn.is_available:
return ModelFormat.RKNN
elif ann.ann.is_available and settings.ann:
return ModelFormat.ARMNN
else:
return ModelFormat.ONNX

View File

@@ -44,6 +44,18 @@ _OPENCLIP_MODELS = {
"nllb-clip-base-siglip__v1",
"nllb-clip-large-siglip__mrl",
"nllb-clip-large-siglip__v1",
"ViT-B-16-SigLIP2__webli",
"ViT-B-32-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-384__webli",
"ViT-L-16-SigLIP2-512__webli",
"ViT-SO400M-14-SigLIP2-378__webli",
"ViT-SO400M-14-SigLIP2__webli",
"ViT-SO400M-16-SigLIP2-256__webli",
"ViT-SO400M-16-SigLIP2-384__webli",
"ViT-SO400M-16-SigLIP2-512__webli",
"ViT-gopt-16-SigLIP2-256__webli",
"ViT-gopt-16-SigLIP2-384__webli",
}
@@ -65,6 +77,9 @@ _INSIGHTFACE_MODELS = {
SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
def get_model_source(model_name: str) -> ModelSource | None:
cleaned_name = clean_name(model_name)

View File

@@ -31,7 +31,7 @@ class FaceRecognizer(InferenceModel):
self._add_batch_axis(self.model_path)
session = self._make_session(self.model_path)
self.model = ArcFaceONNX(
self.model_path.with_suffix(".onnx").as_posix(),
self.model_path_for_format(ModelFormat.ONNX).as_posix(),
session=session,
)
return session

View File

@@ -35,6 +35,7 @@ class ModelType(StrEnum):
class ModelFormat(StrEnum):
ARMNN = "armnn"
ONNX = "onnx"
RKNN = "rknn"
class ModelSource(StrEnum):

View File

@@ -0,0 +1,76 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
import numpy as np
from numpy.typing import NDArray
from app.config import log, settings
from app.schemas import SessionNode
from .rknnpool import RknnPoolExecutor, is_available, soc_name
is_available = is_available and settings.rknn
model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
return outputs
input_output_mapping: dict[str, dict[str, Any]] = {
"detection": {
"input": {"norm_tensor:0": (1, 3, 640, 640)},
"output": {
"norm_tensor:1": (12800, 1),
"norm_tensor:2": (3200, 1),
"norm_tensor:3": (800, 1),
"norm_tensor:4": (12800, 4),
"norm_tensor:5": (3200, 4),
"norm_tensor:6": (800, 4),
"norm_tensor:7": (12800, 10),
"norm_tensor:8": (3200, 10),
"norm_tensor:9": (800, 10),
},
},
"recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
}
class RknnSession:
def __init__(self, model_path: Path) -> None:
self.model_type = "detection" if "detection" in model_path.parts else "recognition"
self.tpe = settings.rknn_threads
log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
def get_inputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
def get_outputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[NDArray[np.float32]]:
input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
self.rknnpool.put(input_data)
res = self.rknnpool.get()
if res is None:
raise RuntimeError("RKNN inference failed!")
return res
class RknnNode(NamedTuple):
name: str | None
shape: tuple[int, ...]
__all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]

View File

@@ -0,0 +1,91 @@
# This code is from leafqycc/rknn-multi-threaded
# Following Apache License 2.0
import logging
from concurrent.futures import Future, ThreadPoolExecutor
from pathlib import Path
from queue import Queue
from typing import Callable
import numpy as np
from numpy.typing import NDArray
from app.config import log
from app.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
def get_soc(device_tree_path: Path | str) -> str | None:
try:
with Path(device_tree_path).open() as f:
device_compatible_str = f.read()
for soc in RKNN_SUPPORTED_SOCS:
if soc in device_compatible_str:
return soc
log.warning("Device is not supported for RKNN")
except OSError as e:
log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
return None
soc_name = None
is_available = False
try:
from rknnlite.api import RKNNLite
soc_name = get_soc("/proc/device-tree/compatible")
is_available = soc_name is not None
except ImportError:
log.debug("RKNN is not available")
def init_rknn(model_path: str) -> "RKNNLite":
if not is_available:
raise RuntimeError("rknn is not available!")
rknn_lite = RKNNLite()
rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
ret = rknn_lite.load_rknn(model_path)
if ret != 0:
raise RuntimeError("Failed to load RKNN model")
if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
else:
ret = rknn_lite.init_runtime() # Please do not set this parameter on other platforms.
if ret != 0:
raise RuntimeError("Failed to inititalize RKNN runtime environment")
return rknn_lite
class RknnPoolExecutor:
def __init__(
self,
model_path: str,
tpes: int,
func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
) -> None:
self.tpes = tpes
self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
self.pool = ThreadPoolExecutor(max_workers=tpes)
self.func = func
self.num = 0
def put(self, inputs: list[NDArray[np.float32]]) -> None:
self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
self.num += 1
def get(self) -> list[NDArray[np.float32]] | None:
if self.queue.empty():
return None
fut = self.queue.get()
return fut.result()
def release(self) -> None:
self.pool.shutdown()
for rknn_lite in self.rknn_pool:
rknn_lite.release()
def __del__(self) -> None:
self.release()

View File

@@ -25,6 +25,7 @@ from app.models.facial_recognition.detection import FaceDetector
from app.models.facial_recognition.recognition import FaceRecognizer
from app.sessions.ann import AnnSession
from app.sessions.ort import OrtSession
from app.sessions.rknn import RknnSession, run_inference
from .config import Settings, settings
from .models.base import InferenceModel
@@ -69,6 +70,14 @@ class TestBase:
assert encoder.model_format == ModelFormat.ARMNN
def test_sets_default_model_format_to_rknn_if_available(self, mocker: MockerFixture) -> None:
mocker.patch.object(settings, "rknn", True)
mocker.patch("app.sessions.rknn.is_available", True)
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.model_format == ModelFormat.RKNN
def test_casts_cache_dir_string_to_path(self) -> None:
cache_dir = "/test_cache"
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir)
@@ -125,7 +134,7 @@ class TestBase:
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=["*.armnn"],
ignore_patterns=["*.armnn", "*.rknn"],
)
def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
@@ -136,7 +145,18 @@ class TestBase:
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=[],
ignore_patterns=["*.rknn"],
)
def test_download_downloads_rknn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.RKNN)
encoder.download()
snapshot_download.assert_called_once_with(
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=["*.armnn"],
)
def test_throws_exception_if_model_path_does_not_exist(
@@ -328,6 +348,33 @@ class TestAnnSession:
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
class TestRknnSession:
def test_creates_rknn_session(self, rknn_session: mock.Mock, info: mock.Mock, mocker: MockerFixture) -> None:
model_path = mock.MagicMock(spec=Path)
tpe = 1
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
mocker.patch("app.sessions.rknn.is_available", True)
RknnSession(model_path)
rknn_session.assert_called_once_with(model_path=model_path.as_posix(), tpes=tpe, func=run_inference)
info.assert_has_calls([mock.call(f"Loaded RKNN model from {model_path} with {tpe} threads.")])
def test_run_rknn(self, rknn_session: mock.Mock, mocker: MockerFixture) -> None:
rknn_session.return_value.load.return_value = 123
np_spy = mocker.spy(np, "ascontiguousarray")
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
session = RknnSession(Path("ViT-B-32__openai"))
[input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)]
input_feed = {"input.1": input1, "input.2": input2}
session.run(None, input_feed)
rknn_session.return_value.put.assert_called_once_with([input1, input2])
np_spy.call_count == 2
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
class TestCLIP:
embedding = np.random.rand(512).astype(np.float32)
cache_dir = Path("test_cache")
@@ -829,9 +876,7 @@ class TestLoad:
mock_model.clear_cache.assert_not_called()
mock_model.load.assert_not_called()
async def test_falls_back_to_onnx_if_other_format_does_not_exist(
self, exception: mock.Mock, warning: mock.Mock
) -> None:
async def test_falls_back_to_onnx_if_other_format_does_not_exist(self, warning: mock.Mock) -> None:
mock_model = mock.Mock(spec=InferenceModel)
mock_model.model_name = "test_model_name"
mock_model.model_type = ModelType.VISUAL
@@ -846,8 +891,9 @@ class TestLoad:
mock_model.clear_cache.assert_not_called()
assert mock_model.load.call_count == 2
exception.assert_called_once_with(error)
warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.")
warning.assert_called_once_with(
"ARMNN is available, but model 'test_model_name' does not support it.", exc_info=error
)
mock_model.model_format = ModelFormat.ONNX