Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/dstack/_internal/cli/services/configurators/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,9 @@ def validate_gpu_vendor_and_image(self, conf: RunConfigurationT) -> None:
return
if gpu_spec.count.max == 0:
return
# No specific GPU requested (default: 0..)
if gpu_spec.name is None and gpu_spec.vendor is None and gpu_spec.count.min == 0:
return
Comment thread
peterschmidt85 marked this conversation as resolved.
Outdated
has_amd_gpu: bool
has_tt_gpu: bool
vendor = gpu_spec.vendor
Expand Down
6 changes: 5 additions & 1 deletion src/dstack/_internal/core/models/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,9 @@ def _vendor_from_string(cls, v: str) -> gpuhunt.AcceleratorVendor:
return gpuhunt.AcceleratorVendor.cast(v)


DEFAULT_GPU_SPEC = GPUSpec(count=Range[int](min=0, max=None))


class DiskSpecConfig(CoreConfig):
@staticmethod
def schema_extra(schema: Dict[str, Any]):
Expand Down Expand Up @@ -387,7 +390,8 @@ class ResourcesSpec(generate_dual_core_model(ResourcesSpecConfig)):
"you may need to configure this"
),
] = None
gpu: Annotated[Optional[GPUSpec], Field(description="The GPU requirements")] = None
# Optional for backward compatibility
gpu: Annotated[Optional[GPUSpec], Field(description="The GPU requirements")] = DEFAULT_GPU_SPEC
disk: Annotated[Optional[DiskSpec], Field(description="The disk resources")] = DEFAULT_DISK

def pretty_format(self) -> str:
Expand Down
24 changes: 5 additions & 19 deletions src/dstack/_internal/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,22 +97,7 @@ def pretty_resources(
compute_capability: Optional[Any] = None,
disk_size: Optional[Any] = None,
) -> str:
"""
>>> pretty_resources(cpus=4, memory="16GB")
'4xCPU, 16GB'
>>> pretty_resources(cpus=4, memory="16GB", gpu_count=1)
'4xCPU, 16GB, 1xGPU'
>>> pretty_resources(cpus=4, memory="16GB", gpu_count=1, gpu_name='A100')
'4xCPU, 16GB, 1xA100'
>>> pretty_resources(cpus=4, memory="16GB", gpu_count=1, gpu_name='A100', gpu_memory="40GB")
'4xCPU, 16GB, 1xA100 (40GB)'
>>> pretty_resources(cpus=4, memory="16GB", gpu_count=1, total_gpu_memory="80GB")
'4xCPU, 16GB, 1xGPU (total 80GB)'
>>> pretty_resources(cpus=4, memory="16GB", gpu_count=2, gpu_name='A100', gpu_memory="40GB", total_gpu_memory="80GB")
'4xCPU, 16GB, 2xA100 (40GB, total 80GB)'
>>> pretty_resources(gpu_count=1, compute_capability="8.0")
'1xGPU (8.0)'
"""
"""Format resource requirements as a human-readable string."""
parts = []
if cpus is not None:
cpu_arch_lower: Optional[str] = None
Expand All @@ -131,7 +116,6 @@ def pretty_resources(
parts.append(f"disk={disk_size}")
if gpu_count:
gpu_parts = []
gpu_parts.append(f"{gpu_name or 'gpu'}")
if gpu_memory is not None:
gpu_parts.append(f"{gpu_memory}")
if gpu_count is not None:
Expand All @@ -141,8 +125,10 @@ def pretty_resources(
if compute_capability is not None:
gpu_parts.append(f"{compute_capability}")

gpu = ":".join(gpu_parts)
parts.append(gpu)
if gpu_name:
parts.append("gpu=" + ":".join([f"{gpu_name}"] + gpu_parts))
else:
parts.append("gpu=" + ":".join(gpu_parts))
return " ".join(parts)


Expand Down
5 changes: 4 additions & 1 deletion src/tests/_internal/cli/services/configurators/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ def validate(self, conf: BaseRunConfiguration) -> None:
def test_no_gpu(self):
conf = self.prepare_conf()
self.validate(conf)
assert conf.resources.gpu is None
assert conf.resources.gpu is not None
assert conf.resources.gpu.vendor is None
assert conf.resources.gpu.name is None
assert conf.resources.gpu.count.min == 0

def test_zero_gpu(self):
conf = self.prepare_conf(gpu_spec="0")
Expand Down
27 changes: 24 additions & 3 deletions src/tests/_internal/server/routers/test_fleets.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,14 @@ async def test_creates_fleet(self, test_db, session: AsyncSession, client: Async
"cpu": {"min": 2, "max": None},
"memory": {"min": 8.0, "max": None},
"shm_size": None,
"gpu": None,
"gpu": {
"vendor": None,
"name": None,
"count": {"min": 0, "max": None},
"memory": None,
"total_memory": None,
"compute_capability": None,
},
"disk": {"size": {"min": 100.0, "max": None}},
},
"backends": None,
Expand Down Expand Up @@ -467,7 +474,14 @@ async def test_creates_ssh_fleet(self, test_db, session: AsyncSession, client: A
"cpu": {"min": 2, "max": None},
"memory": {"min": 8.0, "max": None},
"shm_size": None,
"gpu": None,
"gpu": {
"vendor": None,
"name": None,
"count": {"min": 0, "max": None},
"memory": None,
"total_memory": None,
"compute_capability": None,
},
"disk": {"size": {"min": 100.0, "max": None}},
},
"backends": None,
Expand Down Expand Up @@ -639,7 +653,14 @@ async def test_updates_ssh_fleet(self, test_db, session: AsyncSession, client: A
"cpu": {"min": 2, "max": None},
"memory": {"min": 8.0, "max": None},
"shm_size": None,
"gpu": None,
"gpu": {
"vendor": None,
"name": None,
"count": {"min": 0, "max": None},
"memory": None,
"total_memory": None,
"compute_capability": None,
},
"disk": {"size": {"min": 100.0, "max": None}},
},
"backends": None,
Expand Down
60 changes: 60 additions & 0 deletions src/tests/_internal/utils/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
make_proxy_url,
parse_memory,
pretty_date,
pretty_resources,
sizeof_fmt,
)

Expand Down Expand Up @@ -239,6 +240,65 @@ def test_make_proxy_url(server_url, proxy_url, expected_url):
assert make_proxy_url(server_url, proxy_url) == expected_url


class TestPrettyResources:
def test_cpu_and_memory(self):
assert pretty_resources(cpus=4, memory="16GB") == "cpu=4 mem=16GB"

def test_gpu_count_without_name(self):
assert pretty_resources(cpus=4, memory="16GB", gpu_count=1) == "cpu=4 mem=16GB gpu=1"

def test_gpu_count_with_name(self):
assert (
pretty_resources(cpus=4, memory="16GB", gpu_count=1, gpu_name="A100")
== "cpu=4 mem=16GB gpu=A100:1"
)

def test_gpu_with_name_and_memory(self):
assert (
pretty_resources(
cpus=4, memory="16GB", gpu_count=1, gpu_name="A100", gpu_memory="40GB"
)
== "cpu=4 mem=16GB gpu=A100:40GB:1"
)

def test_gpu_with_total_memory_without_name(self):
assert (
pretty_resources(cpus=4, memory="16GB", gpu_count=1, total_gpu_memory="80GB")
== "cpu=4 mem=16GB gpu=1:80GB"
)

def test_gpu_with_name_memory_and_total_memory(self):
assert (
pretty_resources(
cpus=4,
memory="16GB",
gpu_count=2,
gpu_name="A100",
gpu_memory="40GB",
total_gpu_memory="80GB",
)
== "cpu=4 mem=16GB gpu=A100:40GB:2:80GB"
)

def test_gpu_with_compute_capability(self):
assert pretty_resources(gpu_count=1, compute_capability="8.0") == "gpu=1:8.0"

def test_disk(self):
assert (
pretty_resources(cpus=2, memory="8GB", disk_size="100GB") == "cpu=2 mem=8GB disk=100GB"
)

def test_no_gpu(self):
assert pretty_resources(cpus=2, memory="8GB") == "cpu=2 mem=8GB"

def test_gpu_zero_count_range(self):
"""Default GPU spec (0..) should display gpu=0.."""
assert (
pretty_resources(cpus=2, memory="8GB", disk_size="100GB", gpu_count="0..")
== "cpu=2 mem=8GB disk=100GB gpu=0.."
)


class TestSizeofFmt:
@pytest.mark.parametrize(
("num", "suffix", "expected"),
Expand Down