From dfa0aef6d0171fc7ced791412d544924623e899c Mon Sep 17 00:00:00 2001 From: Alexander-Ger-Reich <50119493+Alexander-Ger-Reich@users.noreply.github.com> Date: Sun, 12 Apr 2026 19:29:14 +0200 Subject: [PATCH 1/3] AMD rocm Fix 1 --- pyproject.toml | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c0f0c32e..2d44c807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,10 +83,16 @@ url = "https://download.pytorch.org/whl/cu128" # Only use this index when explicitly requested by `tool.uv.sources`. explicit = true +[[tool.uv.index]] +name = "pytorch-rocm" +# For AMD (e.g., RX 7900 series / GFX 1100 architecture) +url = "https://rocm.nightlies.amd.com/v2/gfx110X-all/" +explicit = true + [tool.uv] constraint-dependencies = [ - "torch==2.8.0", - "torchaudio==2.8.0", + "torch", + "torchaudio", ] [tool.hatch.build.targets.sdist] @@ -94,3 +100,19 @@ include = ["omnivoice"] [tool.hatch.build.targets.wheel] packages = ["omnivoice"] + +# ========================================================= +# IMPORTANT: Select the correct graphics card here! +# Comment out the block for your current card +# and uncomment the other one. +# ========================================================= + +# FOR NVIDIA: +[tool.uv.sources] +torch = { index = "pytorch-cuda" } +torchaudio = { index = "pytorch-cuda" } + +# FOR AMD: +# [tool.uv.sources] +# torch = { index = "pytorch-rocm" } +# torchaudio = { index = "pytorch-rocm" } From 0261153c80bef9fcd5be3b8d5c556120fcc015e0 Mon Sep 17 00:00:00 2001 From: Alexander-Ger-Reich <50119493+Alexander-Ger-Reich@users.noreply.github.com> Date: Sun, 12 Apr 2026 20:50:28 +0200 Subject: [PATCH 2/3] AMD rocm Fix 2 --- omnivoice/utils/audio.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/omnivoice/utils/audio.py b/omnivoice/utils/audio.py index 9fcb2c2c..661e720b 100644 --- a/omnivoice/utils/audio.py +++ b/omnivoice/utils/audio.py @@ -25,6 +25,7 @@ import numpy as np import torch import torchaudio +import soundfile as sf from pydub import AudioSegment from pydub.silence import detect_leading_silence, detect_nonsilent, split_on_silence @@ -42,9 +43,8 @@ def load_audio(audio_path: str, sampling_rate: int): PyTorch tensor of shape (1, T) """ try: - waveform, prompt_sampling_rate = torchaudio.load( - audio_path, backend="soundfile" - ) + data, prompt_sampling_rate = sf.read(audio_path, dtype="float32") + waveform = torch.from_numpy(data.T if data.ndim > 1 else data.reshape(1, -1)) except (RuntimeError, OSError): # Fallback via pydub+ffmpeg for formats torchaudio can't handle aseg = AudioSegment.from_file(audio_path) From 338282894724365484ed24cba24197e8e3829888 Mon Sep 17 00:00:00 2001 From: Alexander-Ger-Reich <50119493+Alexander-Ger-Reich@users.noreply.github.com> Date: Sun, 12 Apr 2026 20:54:05 +0200 Subject: [PATCH 3/3] AMD rocm Fix 3 --- pyproject.toml | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2d44c807..0c1255fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,14 @@ Repository = "https://github.com/k2-fsa/OmniVoice" # Install PyTorch with CUDA support on Linux/Windows (CUDA doesn't exist for Mac). # NOTE: We must explicitly request them as `dependencies` above. These improved # versions will not be selected if they're only third-party dependencies. + +# ========================================================= +# IMPORTANT: Select the correct graphics card here! +# Comment out the block for your current card +# and uncomment the other one. +# ========================================================= + +# FOR NVIDIA: torch = [ { index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] @@ -75,6 +83,14 @@ torchaudio = [ { index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] +# FOR AMD: +#torch = [ +# { index = "pytorch-rocm", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +#] +#torchaudio = [ +# { index = "pytorch-rocm", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +#] + [[tool.uv.index]] name = "pytorch-cuda" # Use PyTorch built for NVIDIA Toolkit version 12.8. @@ -89,30 +105,8 @@ name = "pytorch-rocm" url = "https://rocm.nightlies.amd.com/v2/gfx110X-all/" explicit = true -[tool.uv] -constraint-dependencies = [ - "torch", - "torchaudio", -] - [tool.hatch.build.targets.sdist] include = ["omnivoice"] [tool.hatch.build.targets.wheel] packages = ["omnivoice"] - -# ========================================================= -# IMPORTANT: Select the correct graphics card here! -# Comment out the block for your current card -# and uncomment the other one. -# ========================================================= - -# FOR NVIDIA: -[tool.uv.sources] -torch = { index = "pytorch-cuda" } -torchaudio = { index = "pytorch-cuda" } - -# FOR AMD: -# [tool.uv.sources] -# torch = { index = "pytorch-rocm" } -# torchaudio = { index = "pytorch-rocm" }