k2-fsa · Alexander-Ger-Reich · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026 · EnochPrime
diff --git a/omnivoice/utils/audio.py b/omnivoice/utils/audio.py
@@ -25,6 +25,7 @@
 import numpy as np
 import torch
 import torchaudio
+import soundfile as sf
 from pydub import AudioSegment
 from pydub.silence import detect_leading_silence, detect_nonsilent, split_on_silence
 
@@ -42,9 +43,8 @@ def load_audio(audio_path: str, sampling_rate: int):
         PyTorch tensor of shape (1, T)
     """
     try:
-        waveform, prompt_sampling_rate = torchaudio.load(
-            audio_path, backend="soundfile"
-        )
+        data, prompt_sampling_rate = sf.read(audio_path, dtype="float32")
+        waveform = torch.from_numpy(data.T if data.ndim > 1 else data.reshape(1, -1))
     except (RuntimeError, OSError):
         # Fallback via pydub+ffmpeg for formats torchaudio can't handle
         aseg = AudioSegment.from_file(audio_path)

diff --git a/pyproject.toml b/pyproject.toml
@@ -68,13 +68,29 @@ Repository = "https://github.com/k2-fsa/OmniVoice"
 # Install PyTorch with CUDA support on Linux/Windows (CUDA doesn't exist for Mac).
 # NOTE: We must explicitly request them as `dependencies` above. These improved
 # versions will not be selected if they're only third-party dependencies.
+
+# =========================================================
+# IMPORTANT: Select the correct graphics card here!
+# Comment out the block for your current card 
+# and uncomment the other one.
+# =========================================================
+
+# FOR NVIDIA:
 torch = [
   { index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 torchaudio = [
   { index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 
+# FOR AMD:
+#torch = [
+#  { index = "pytorch-rocm", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+#]
+#torchaudio = [
+#  { index = "pytorch-rocm", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+#]
+
 [[tool.uv.index]]
 name = "pytorch-cuda"
 # Use PyTorch built for NVIDIA Toolkit version 12.8.
@@ -83,11 +99,11 @@ url = "https://download.pytorch.org/whl/cu128"
 # Only use this index when explicitly requested by `tool.uv.sources`.
 explicit = true
 
-[tool.uv]
-constraint-dependencies = [
-    "torch==2.8.0",
-    "torchaudio==2.8.0",
-]
+[[tool.uv.index]]
+name = "pytorch-rocm"
+# For AMD (e.g., RX 7900 series / GFX 1100 architecture)
+url = "https://rocm.nightlies.amd.com/v2/gfx110X-all/"
+explicit = true
 
 [tool.hatch.build.targets.sdist]
 include = ["omnivoice"]