Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8cb01d3
feat(vision): add vision node suite and detection models
ryan-t-christensen May 11, 2026
fd406d9
feat(vision): add dense_resize image util for memory-bounded inference
ryan-t-christensen Jun 3, 2026
d0d77f4
feat(vision): add detection model loaders (RF-DETR, MM-G-DINO, Mask2F…
ryan-t-christensen Jun 3, 2026
a9598d3
refactor(detect): swap YOLO-World for RF-DETR + MM-Grounding-DINO
ryan-t-christensen Jun 3, 2026
db456be
refactor(detect_segment): swap SAM3 for Mask2Former instance + semantic
ryan-t-christensen Jun 3, 2026
9a3f8fd
refactor(depth_estimate): lock to Depth-Anything V2 Small, image-only
ryan-t-christensen Jun 3, 2026
269dd00
refactor(vision): specialize describe into caption-only Florence-2 node
ryan-t-christensen Jun 3, 2026
4ebd164
feat(vision): add face_detection node (MediaPipe BlazeFace)
ryan-t-christensen Jun 3, 2026
18b5695
feat(vision): add pose_estimation node (RTMPose via rtmlib)
ryan-t-christensen Jun 3, 2026
7b31269
feat(vision): add background_removal node (BiRefNet)
ryan-t-christensen Jun 3, 2026
57c6120
fix(ui): textarea helperText + sticky outlined label
ryan-t-christensen Jun 3, 2026
413a15a
chore(deps): pin transformers==4.53.3 for vision suite compatibility
ryan-t-christensen Jun 3, 2026
6be9708
chore(video_composer): add missing icon
ryan-t-christensen Jun 3, 2026
1d8a13a
fix(vision): address CodeRabbit review on PR #1081
ryan-t-christensen Jun 3, 2026
208fa35
refactor(face_detection): remove non-functional biometric-chain guard
ryan-t-christensen Jun 3, 2026
abe6d8c
refactor(face_detection): drop unused torch device-probe; add real fa…
asclearuc Jun 4, 2026
cf8e72c
feat(depth_estimate): serve via model server
asclearuc Jun 4, 2026
bd27820
@
asclearuc Jun 7, 2026
313e436
feat(vision): serve depth/detect/detect_segment via model server
asclearuc Jun 7, 2026
a2be783
feat(vision): serve caption + background_removal via model server
asclearuc Jun 9, 2026
ee7ac3e
refactor(vision): harden converted nodes + complete fulltest coverage
asclearuc Jun 9, 2026
fb5b84f
feat(pose_estimation): serve via model server; fix caption GPU dtype
asclearuc Jun 10, 2026
fe6b5ca
Merge branch 'develop' into feat/vision
asclearuc Jun 10, 2026
a1bcbf7
fix(depends): use engine_cache_dir for excludes.txt path
asclearuc Jun 10, 2026
421cd0e
fix(face_detection): prevent engine abort, handle missing libGLESv2
asclearuc Jun 10, 2026
fa26c7e
fix(face_detection): require libEGL.so.1; skip mediapipe in contract-…
asclearuc Jun 11, 2026
58d3581
fix(nodes): address CodeRabbit review findings
asclearuc Jun 11, 2026
af7174b
fix(nodes): address CodeRabbit review findings
asclearuc Jun 12, 2026
70b7690
fix(nodes): address CodeRabbit review findings
asclearuc Jun 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docker/Dockerfile.engine
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ RUN apt-get update \
libc++1 \
libc++abi1 \
libgomp1 \
libgles2 \
libegl1 \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd -r rocketride && useradd -r -g rocketride -d /opt/rocketride rocketride

Expand Down
73 changes: 73 additions & 0 deletions nodes/src/nodes/background_removal/IGlobal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# =============================================================================
# MIT License
#
# Copyright (c) 2026 Aparavi Software AG
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

import threading

from rocketlib import IGlobalBase, OPEN_MODE, warning
from ai.common.config import Config

# Bounds for the source long-edge cap (composite resolution).
DEFAULT_MAX_EDGE = 1024
MIN_MAX_EDGE = 256
MAX_MAX_EDGE = 4096


class IGlobal(IGlobalBase):
remover = None
device_lock = None
max_edge = DEFAULT_MAX_EDGE

def beginGlobal(self):
"""Build the shared BackgroundRemover facade and parse the clamped maxEdge config."""
if self.IEndpoint.endpoint.openMode == OPEN_MODE.CONFIG:
return

from ai.common.models.vision.background import BackgroundRemover, DEFAULT_MODEL

node_cfg = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig)

model_name = (node_cfg.get('model') or '').strip()
if not model_name:
warning(f'background_removal: no model configured, using default {DEFAULT_MODEL}')
model_name = DEFAULT_MODEL

# max_edge: bound composite/source resolution so memory stays predictable.
try:
self.max_edge = int(node_cfg.get('maxEdge', DEFAULT_MAX_EDGE))
except (TypeError, ValueError):
self.max_edge = DEFAULT_MAX_EDGE
self.max_edge = min(MAX_MAX_EDGE, max(MIN_MAX_EDGE, self.max_edge))

revision = (node_cfg.get('revision') or '').strip() or None

# device=None -> model server when --modelserver is set, else local.
self.remover = BackgroundRemover(model_name=model_name, device=None, revision=revision)
self.device_lock = threading.Lock()

def endGlobal(self):
"""Disconnect the facade and release shared state on teardown."""
if self.remover is not None:
self.remover.disconnect()
self.remover = None
self.device_lock = None
108 changes: 108 additions & 0 deletions nodes/src/nodes/background_removal/IInstance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# =============================================================================
# MIT License
#
# Copyright (c) 2026 Aparavi Software AG
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

import json

from rocketlib import IInstanceBase, AVI_ACTION, warning
from ai.common.image import Image, ImageProcessor
from ai.common.image.dense_resize import resize_for_inference
from .IGlobal import IGlobal


class IInstance(IInstanceBase):
"""
IInstance handles background removal for the background_removal node.

Accepts image lane (AVI stream). Emits per frame:
- text lane: JSON alpha stats {mean_alpha, alpha_coverage_pct}.
- image lane: RGBA cutout PNG (straight, non-premultiplied alpha).

The model facade (ai.common.models.vision.background) returns a raw alpha
matte; source capping, RGBA compositing and stats are done here (node-side).
"""

IGlobal: IGlobal

def __init__(self, *args, **kwargs):
"""Initialize per-instance image-accumulation state."""
super().__init__(*args, **kwargs)
self._image_data = None

def _emit(self, image):
"""Remove background for one image; write JSON stats (text) and an RGBA cutout (image).

Args:
image: Decoded input PIL image for this frame.
"""
import numpy as np

# Cap the source first; the cutout is emitted at this (capped) resolution.
source_capped, _ = resize_for_inference(image.convert('RGB'), self.IGlobal.max_edge)

with self.IGlobal.device_lock:
alpha = self.IGlobal.remover.remove(source_capped)

alpha_norm = alpha.astype(np.float32) / 255.0
stats = {
'mean_alpha': float(alpha_norm.mean()),
'alpha_coverage_pct': float((alpha_norm > 0.5).mean() * 100.0),
}

if self.instance.hasListener('text'):
self.instance.writeText(json.dumps(stats))

if self.instance.hasListener('image'):
# Straight (un-premultiplied) alpha avoids dark fringes when consumers
# re-composite over a non-black background.
r, g, b = source_capped.split()
rgba = Image.merge('RGBA', (r, g, b, Image.fromarray(alpha, mode='L')))
image_bytes = ImageProcessor.get_bytes(rgba)
self.instance.writeImage(AVI_ACTION.BEGIN, 'image/png')
self.instance.writeImage(AVI_ACTION.WRITE, 'image/png', image_bytes)
self.instance.writeImage(AVI_ACTION.END, 'image/png')

def writeImage(self, action: int, mimeType: str, buffer: bytes):
"""Accumulate an inbound image stream and run background removal on END.

Args:
action: AVI stream action (BEGIN/WRITE/END).
mimeType: MIME type of the image chunk.
buffer: Raw bytes for a WRITE action.

Returns:
preventDefault() on END to suppress default forwarding; None otherwise.
"""
if action == AVI_ACTION.BEGIN:
self._image_data = bytearray()
elif action == AVI_ACTION.WRITE:
self._image_data += buffer
elif action == AVI_ACTION.END:
try:
image = ImageProcessor.load_image_from_bytes(self._image_data)
self._emit(image)
except Exception as exc:
warning(f'background_removal: dropping frame due to inference error: {exc}')
finally:
self._image_data = None
return self.preventDefault()
9 changes: 9 additions & 0 deletions nodes/src/nodes/background_removal/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# =============================================================================
# MIT License
# Copyright (c) 2026 Aparavi Software AG
# =============================================================================

from .IGlobal import IGlobal
from .IInstance import IInstance

__all__ = ['IGlobal', 'IInstance']
7 changes: 7 additions & 0 deletions nodes/src/nodes/background_removal/background-removal.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
113 changes: 113 additions & 0 deletions nodes/src/nodes/background_removal/services.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"title": "Background Removal",
"protocol": "background_removal://",
"classType": ["image"],
"capabilities": ["gpu", "experimental"],
"register": "filter",
"node": "python",
"path": "nodes.background_removal",
"prefix": "background_removal",
"description": ["Foreground / background separation using BiRefNet (MIT). Returns an RGBA cutout with a straight (non-premultiplied) alpha channel, so downstream nodes can re-composite over any background without dark fringes. ", "Two profiles are shipped: the default 1K BiRefNet and a higher-resolution 2K variant for fine hair / detailed edges. Runs on CPU, Apple Silicon (MPS), and CUDA."],
"icon": "background-removal.svg",
"documentation": "https://docs.rocketride.org",
"tile": ["Model: ${parameters.background_removal.profile}", "Max edge: ${parameters.background_removal.maxEdge}"],
"lanes": {
"image": ["text", "image"]
},
"input": [
{
"lane": "image",
"description": "AVI image stream to remove the background from.",
"output": [
{
"lane": "image",
"description": "RGBA PNG (straight alpha) as raw stream — pipe to any node accepting image input."
},
{
"lane": "text",
"description": "JSON alpha stats: {mean_alpha, alpha_coverage_pct}."
}
]
}
],
"preconfig": {
"default": "birefnet-default",
"profiles": {
"birefnet-default": {
"title": "BiRefNet — default, 1K (MIT)",
"model": "ZhengPeng7/BiRefNet",
"revision": "e2bf8e4460fc8fa32bba5ea4d94b3233d367b0e4",
"maxEdge": 1024
},
"birefnet-hr": {
"title": "BiRefNet HR — 2K, finer hair / edge detail (MIT)",
"model": "ZhengPeng7/BiRefNet_HR",
"revision": "a7a562f6fd16021180f2f4348f4de003a2d3d1e1",
"maxEdge": 2048
}
}
},
"fields": {
"background_removal.model": {
"type": "string",
"title": "Model",
"description": "HuggingFace model identifier for background removal (overrides the profile default)",
"minLength": 2,
"maxLength": 256
},
"background_removal.maxEdge": {
"type": "number",
"title": "Max input edge (px)",
"description": "Downscale source so long edge <= this value before inference; alpha is upsampled back to the (capped) source size for compositing. Lower = faster + less VRAM; higher = sharper edges.",
"default": 1024,
"minimum": 256,
"maximum": 4096
},
"background_removal.profile": {
"title": "Model",
"description": "BiRefNet variant — default is 1K, HR is 2K for finer edges.",
"type": "string",
"default": "birefnet-default",
"enum": ["*>preconfig.profiles.*.title"],
"conditional": [
{ "value": "birefnet-default", "properties": [] },
{ "value": "birefnet-hr", "properties": [] }
]
}
},
"shape": [
{
"section": "Pipe",
"title": "Background Removal",
"properties": ["background_removal.profile", "background_removal.maxEdge"]
}
],
"test": {
"profiles": ["birefnet-default"],
"outputs": ["text"],
"timeout": 180,
"cases": [
{
"name": "Remove background from image",
"image": "images/einstein.jpg",
"expect": {
"text": { "notEmpty": true, "contains": "mean_alpha" }
}
}
]
},
"fulltest": {
"profiles": ["birefnet-default", "birefnet-hr"],
"outputs": ["text"],
"timeout": 300,
"cases": [
{
"name": "Remove background from image",
"image": "images/einstein.jpg",
"expect": {
"text": { "notEmpty": true, "contains": "mean_alpha" }
}
}
]
}
}
60 changes: 60 additions & 0 deletions nodes/src/nodes/caption/IGlobal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# =============================================================================
# MIT License
#
# Copyright (c) 2026 Aparavi Software AG
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

import threading

from rocketlib import IGlobalBase, OPEN_MODE, warning
from ai.common.config import Config


class IGlobal(IGlobalBase):
captioner = None
device_lock = None

def beginGlobal(self):
"""Build the shared Captioner facade from node config (model/task)."""
if self.IEndpoint.endpoint.openMode == OPEN_MODE.CONFIG:
return

from ai.common.models.vision.caption import Captioner, DEFAULT_MODEL, DEFAULT_TASK

config = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig)

model_name = (config.get('model') or '').strip()
if not model_name:
warning(f'caption: no model configured, using default {DEFAULT_MODEL}')
model_name = DEFAULT_MODEL
task = str(config.get('task', DEFAULT_TASK)).strip() or DEFAULT_TASK
revision = (config.get('revision') or '').strip() or None

# device=None -> model server when --modelserver is set, else local.
self.captioner = Captioner(model_name=model_name, device=None, task=task, revision=revision)
self.device_lock = threading.Lock()

def endGlobal(self):
"""Disconnect the facade and release shared state on teardown."""
if self.captioner is not None:
self.captioner.disconnect()
self.captioner = None
self.device_lock = None
Loading
Loading