Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions packages/ai/src/ai/common/models/audio/whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,20 @@ def load(
compute_type=compute_type,
)
except Exception as e:
logger.error(f'Failed to load whisper model: {e}')
raise Exception(f'Failed to load whisper model {model_name}: {e}')
if torch_device != 'cpu':
logger.warning(f'Whisper GPU load failed ({e}), falling back to CPU')
torch_device = 'cpu'
gpu_index = -1
if compute_type == 'float16':
compute_type = 'int8'
try:
model = WhisperModel(model_name, device='cpu', compute_type=compute_type)
except Exception as cpu_e:
logger.error(f'Failed to load whisper model on CPU: {cpu_e}')
raise Exception(f'Failed to load whisper model {model_name}: {cpu_e}')
else:
logger.error(f'Failed to load whisper model: {e}')
raise Exception(f'Failed to load whisper model {model_name}: {e}')
Comment on lines +273 to +278

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | ⚡ Quick win

Preserve exception chain with raise ... from for better debugging.

The exception handling loses the original traceback by creating a new Exception without chaining. Python best practice is to use raise ... from to preserve the full exception context.

♻️ Proposed fix
                 except Exception as cpu_e:
                     logger.error(f'Failed to load whisper model on CPU: {cpu_e}')
-                    raise Exception(f'Failed to load whisper model {model_name}: {cpu_e}')
+                    raise Exception(f'Failed to load whisper model {model_name}: {cpu_e}') from cpu_e
             else:
                 logger.error(f'Failed to load whisper model: {e}')
-                raise Exception(f'Failed to load whisper model {model_name}: {e}')
+                raise Exception(f'Failed to load whisper model {model_name}: {e}') from e
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@packages/ai/src/ai/common/models/audio/whisper.py` around lines 273 - 278,
The except blocks in the whisper model loader lose the original traceback by
re-raising new Exceptions; update the two raise statements in the Whisper
loading logic (the except Exception as cpu_e branch and the outer except as e
branch) to use exception chaining (raise Exception(f'Failed to load whisper
model {model_name}: {cpu_e}') from cpu_e and raise Exception(f'Failed to load
whisper model {model_name}: {e}') from e respectively), keeping the existing
logger.error calls (logger.error(...)) and message content intact so the
original exceptions cpu_e and e are preserved in the chain.


# Bundle model
model_bundle = {
Expand Down
11 changes: 8 additions & 3 deletions packages/ai/src/ai/common/models/gliner/gliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,17 @@ def load(
model.eval()
else:
# === LOCAL MODE: Load directly to specified device ===
if device is None:
# Auto-detect
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

if device is None:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

if 'cuda' in str(device):
dev_idx = int(device.split(':')[1]) if ':' in str(device) else 0
if not probe_cuda(dev_idx):
logger.warning(f'CUDA device {dev_idx} kernel probe failed, falling back to CPU')
device = 'cpu'

logger.info(f'Loading GLiNER {model_name} to {device}')
model = GLiNERModel.from_pretrained(model_name)
model.to(device)
Expand Down
7 changes: 6 additions & 1 deletion packages/ai/src/ai/common/models/ocr/doctr.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def load(
from ai.common.opencv import cv2 # noqa: F401

from doctr.models import ocr_predictor
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

exclude_gpus = exclude_gpus or []
memory_gb = 2.0
Expand All @@ -86,6 +86,11 @@ def load(
gpu_index = 0
torch_device = 'cuda:0'

if torch_device != 'cpu' and not probe_cuda(gpu_index):
logger.warning(f'CUDA device {gpu_index} kernel probe failed, falling back to CPU')
torch_device = 'cpu'
gpu_index = -1

logger.info(f'Loading docTR with det={detection_model}, rec={recognition_model}')

try:
Expand Down
22 changes: 19 additions & 3 deletions packages/ai/src/ai/common/models/ocr/easyocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def load(
from ai.common.opencv import cv2 # noqa: F401

import easyocr
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

languages = languages or ['en']
exclude_gpus = exclude_gpus or []
Expand Down Expand Up @@ -126,6 +126,12 @@ def load(
torch_device = 'cuda:0'
use_gpu = True

if use_gpu and not probe_cuda(gpu_index):
logger.warning(f'CUDA device {gpu_index} kernel probe failed, falling back to CPU for EasyOCR')
use_gpu = False
gpu_index = -1
torch_device = 'cpu'

logger.info(f'Loading EasyOCR with languages {languages} on {torch_device}')

try:
Expand All @@ -135,8 +141,18 @@ def load(
verbose=False,
)
except Exception as e:
logger.error(f'Failed to load EasyOCR: {e}')
raise Exception(f'Failed to load EasyOCR: {e}')
if use_gpu:
logger.warning(f'EasyOCR GPU load failed ({e}), falling back to CPU')
gpu_index = -1
torch_device = 'cpu'
try:
reader = easyocr.Reader(languages, gpu=False, verbose=False)
except Exception as cpu_e:
logger.error(f'Failed to load EasyOCR: {cpu_e}')
raise Exception(f'Failed to load EasyOCR: {cpu_e}')
else:
logger.error(f'Failed to load EasyOCR: {e}')
raise Exception(f'Failed to load EasyOCR: {e}')
Comment on lines +150 to +155

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | ⚡ Quick win

Preserve exception chain with raise ... from for better debugging.

Both exception handlers create new Exception instances without chaining the original exception, losing valuable traceback information. Use raise ... from to preserve the full exception context.

♻️ Proposed fix
                 except Exception as cpu_e:
                     logger.error(f'Failed to load EasyOCR: {cpu_e}')
-                    raise Exception(f'Failed to load EasyOCR: {cpu_e}')
+                    raise Exception(f'Failed to load EasyOCR: {cpu_e}') from cpu_e
             else:
                 logger.error(f'Failed to load EasyOCR: {e}')
-                raise Exception(f'Failed to load EasyOCR: {e}')
+                raise Exception(f'Failed to load EasyOCR: {e}') from e
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@packages/ai/src/ai/common/models/ocr/easyocr.py` around lines 150 - 155, The
except blocks that currently re-raise new Exception objects lose the original
traceback; update the two re-raises in the EasyOCR loading logic to use "raise
Exception(... ) from <original_exception>" so the chain is preserved (use "from
cpu_e" for the cpu_e handler and "from e" for the outer handler), leaving the
logger.error calls intact and referring to the same variables (cpu_e and e) so
full exception context is retained for debugging.

Comment on lines +153 to +155

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | 💤 Low value

Clarify error message when probe-triggered fallback fails.

When probe_cuda fails (line 129) and triggers fallback to CPU, use_gpu is set to False. If the subsequent easyocr.Reader creation on CPU also fails, the error log at line 154 reads "Failed to load EasyOCR: {e}" without indicating that GPU was attempted first. This could confuse debugging—reviewers might think GPU was never tried.

Consider logging a more specific message when CPU loading fails after a probe-triggered fallback, or tracking the fallback state to improve the diagnostic output.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@packages/ai/src/ai/common/models/ocr/easyocr.py` around lines 153 - 155, The
error message when EasyOCR fails to load on CPU should indicate that a GPU probe
was attempted and a fallback to CPU occurred; update the exception handling
around easyocr.Reader creation (the block that catches exceptions after
probe_cuda and sets use_gpu = False) to log and raise a message that includes
the fallback state (e.g., reference use_gpu and that probe_cuda was invoked) so
the logger.error and raised Exception include that CPU fallback was attempted
after a GPU probe failure; locate the probe_cuda invocation and the
easyocr.Reader construction to adjust the log text accordingly.


# EasyOCR wraps its detector and recognizer in DataParallel, which
# scatters every batch across ALL visible GPUs via parallel_apply().
Expand Down
7 changes: 6 additions & 1 deletion packages/ai/src/ai/common/models/ocr/surya.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def load(
from surya.foundation import FoundationPredictor # contract-check: ignore see comment above
from surya.recognition import RecognitionPredictor # contract-check: ignore see comment above
from surya.detection import DetectionPredictor # contract-check: ignore see comment above
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

languages = languages or ['en']
exclude_gpus = exclude_gpus or []
Expand All @@ -91,6 +91,11 @@ def load(
gpu_index = 0
torch_device = 'cuda:0'

if torch_device != 'cpu' and not probe_cuda(gpu_index):
logger.warning(f'CUDA device {gpu_index} kernel probe failed, falling back to CPU')
torch_device = 'cpu'
gpu_index = -1

logger.info(f'Loading Surya OCR on {torch_device}')

try:
Expand Down
7 changes: 6 additions & 1 deletion packages/ai/src/ai/common/models/ocr/trocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def load(
# disable contract check for craft_text_detector due to opencv conflict (see README)
from craft_text_detector import Craft # contract-check: ignore requirements_trocr.txt is `disable`d
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

exclude_gpus = exclude_gpus or []

Expand All @@ -113,6 +113,11 @@ def load(
gpu_index = 0
torch_device = 'cuda:0'

if torch_device != 'cpu' and not probe_cuda(gpu_index):
logger.warning(f'CUDA device {gpu_index} kernel probe failed, falling back to CPU')
torch_device = 'cpu'
gpu_index = -1

logger.info(f'Loading TrOCR pipeline on {torch_device}')

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,17 @@ def load(
model.eval()
else:
# === LOCAL MODE: Load directly to specified device ===
if device is None:
# Auto-detect
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

if device is None:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

if 'cuda' in str(device):
dev_idx = int(device.split(':')[1]) if ':' in str(device) else 0
if not probe_cuda(dev_idx):
logger.warning(f'CUDA device {dev_idx} kernel probe failed, falling back to CPU')
device = 'cpu'

logger.info(f'Loading SentenceTransformer {model_name} to {device}')
model = ST(model_name_or_path=model_name, device=device, **kwargs)
model.eval()
Expand Down
14 changes: 12 additions & 2 deletions packages/ai/src/ai/common/models/transformers/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def _load_model(
) -> Tuple[Any, Dict[str, Any], int]:
"""Load a transformers model with CPU-first loading."""
from transformers import AutoModel
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

# Enable trust_remote_code by default (can be overridden via kwargs)
kwargs.setdefault('trust_remote_code', True)
Expand Down Expand Up @@ -157,6 +157,12 @@ def _load_model(
if device is None:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

if 'cuda' in str(device):
dev_idx = int(device.split(':')[1]) if ':' in str(device) else 0
if not probe_cuda(dev_idx):
logger.warning(f'CUDA device {dev_idx} kernel probe failed, falling back to CPU')
device = 'cpu'

# Load directly to device
model = ModelClass.from_pretrained(model_name, **kwargs)
model = model.to(device)
Expand Down Expand Up @@ -190,7 +196,7 @@ def _load_pipeline(
) -> Tuple[Any, Dict[str, Any], int]:
"""Load a transformers pipeline."""
from transformers import pipeline as hf_pipeline
from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

# Enable trust_remote_code by default (can be overridden via kwargs)
kwargs.setdefault('trust_remote_code', True)
Expand All @@ -217,6 +223,10 @@ def _load_pipeline(
elif device == 'cuda':
device = 0

if device >= 0 and not probe_cuda(device):
logger.warning(f'CUDA device {device} kernel probe failed, falling back to CPU')
device = -1

pipe = hf_pipeline(task=task, model=model_name, device=device, **kwargs)
gpu_index = device if device >= 0 else -1
memory_gb = TransformersLoader._estimate_memory(model_name, task=task)
Expand Down
7 changes: 6 additions & 1 deletion packages/ai/src/ai/common/models/vision/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def load(
"""
VisionLoader._ensure_dependencies()

from ai.common.torch import torch
from ai.common.torch import torch, probe_cuda

variant = (variant or 'clip').lower()
if variant not in ('clip', 'vit'):
Expand All @@ -77,6 +77,11 @@ def load(
else:
if device is None:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
if 'cuda' in str(device):
dev_idx = int(device.split(':')[1]) if ':' in str(device) else 0
if not probe_cuda(dev_idx):
logger.warning(f'CUDA device {dev_idx} kernel probe failed, falling back to CPU')
device = 'cpu'
gpu_index = int(device.split(':')[1]) if ':' in str(device) else (0 if device == 'cuda' else -1)

if variant == 'clip':
Expand Down
24 changes: 23 additions & 1 deletion packages/ai/src/ai/common/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,26 @@
else:
debug(' GPU processing disabled. Recommend using GPU for better performance.')

__all__ = ['torch']

def probe_cuda(device_index: int = 0) -> bool:
"""Return True if CUDA compute kernels work on device_index, False otherwise.

Catches cudaErrorNoKernelImageForDevice that surfaces when the PyTorch build
does not include a kernel binary for the device's compute capability (e.g.
Pascal sm_61 on a Quadro P620). The probe executes a tiny GEMM and then
calls synchronize() so any async CUDA error is raised here rather than
silently deferred to the first real inference call.
"""
if not torch.cuda.is_available():
return False
try:
d = f'cuda:{device_index}'
a = torch.randn(2, 2, device=d)
_ = a @ a # GEMM forces a compute kernel onto the device
torch.cuda.synchronize(d)
return True
except Exception:
return False


__all__ = ['torch', 'probe_cuda']
Loading