Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions omnivoice/utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import numpy as np
import torch
import torchaudio
import soundfile as sf
from pydub import AudioSegment
from pydub.silence import detect_leading_silence, detect_nonsilent, split_on_silence

Expand All @@ -42,9 +43,8 @@ def load_audio(audio_path: str, sampling_rate: int):
PyTorch tensor of shape (1, T)
"""
try:
waveform, prompt_sampling_rate = torchaudio.load(
audio_path, backend="soundfile"
)
data, prompt_sampling_rate = sf.read(audio_path, dtype="float32")
waveform = torch.from_numpy(data.T if data.ndim > 1 else data.reshape(1, -1))
except (RuntimeError, OSError):
# Fallback via pydub+ffmpeg for formats torchaudio can't handle
aseg = AudioSegment.from_file(audio_path)
Expand Down
26 changes: 21 additions & 5 deletions pyproject.toml

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can avoid needed users to uncomment lines by using the optional dependencies methods like in #143.

Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,29 @@ Repository = "https://github.com/k2-fsa/OmniVoice"
# Install PyTorch with CUDA support on Linux/Windows (CUDA doesn't exist for Mac).
# NOTE: We must explicitly request them as `dependencies` above. These improved
# versions will not be selected if they're only third-party dependencies.

# =========================================================
# IMPORTANT: Select the correct graphics card here!
# Comment out the block for your current card
# and uncomment the other one.
# =========================================================

# FOR NVIDIA:
torch = [
{ index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
]
torchaudio = [
{ index = "pytorch-cuda", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
]

# FOR AMD:
#torch = [
# { index = "pytorch-rocm", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
#]
#torchaudio = [
# { index = "pytorch-rocm", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
#]

[[tool.uv.index]]
name = "pytorch-cuda"
# Use PyTorch built for NVIDIA Toolkit version 12.8.
Expand All @@ -83,11 +99,11 @@ url = "https://download.pytorch.org/whl/cu128"
# Only use this index when explicitly requested by `tool.uv.sources`.
explicit = true

[tool.uv]
constraint-dependencies = [
"torch==2.8.0",
"torchaudio==2.8.0",
]
[[tool.uv.index]]
name = "pytorch-rocm"
# For AMD (e.g., RX 7900 series / GFX 1100 architecture)
url = "https://rocm.nightlies.amd.com/v2/gfx110X-all/"
explicit = true

[tool.hatch.build.targets.sdist]
include = ["omnivoice"]
Expand Down