Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions genmetaballs/src/cuda/bindings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,22 @@ template <MemoryLocation location>
void bind_fmb_scene(nb::module_& m, const char* name) {
nb::class_<FMBScene<location>>(m, name)
.def(nb::init<size_t>(), nb::arg("size"))
.def(nb::init<const std::vector<FMB>&, const std::vector<float>&>(), nb::arg("fmbs"),
nb::arg("log_weights"),
"Construct FMBScene from a list of FMBs and corresponding log weights")
.def_prop_ro("size", &FMBScene<location>::size)
.def("__len__", &FMBScene<location>::size)
.def("__getitem__", &FMBScene<location>::get_fmb, nb::arg("idx"),
"Get the (FMB, log_weight) tuple at index i")
.def(
"__getitem__",
// Convert cuda::std::tuple to std::tuple for nanobind
[](const FMBScene<location>& scene, size_t idx) {
const auto& [fmb, log_weight] = scene[idx];
// for device data, the types would be thrust::device_reference, which cannot be
// returned directly to Python. The static cast forces a copy (to host) to be made.
return std::make_tuple(static_cast<const FMB&>(fmb),
static_cast<const float&>(log_weight));
},
"Get the (FMB, log_weight) tuple at index i")
.def("__repr__", [=](const FMBScene<location>& scene) {
return nb::str("{}(size={})").format(name, scene.size());
});
Expand Down
22 changes: 0 additions & 22 deletions genmetaballs/src/cuda/core/fmb.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,3 @@ CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const {
const auto shifted_vec = vec - get_mean();
return dot(shifted_vec, cov_inv_apply(shifted_vec));
}

template <>
__host__ FMBScene<MemoryLocation::HOST>::FMBScene(size_t size)
: fmbs_{new FMB[size]}, log_weights_{new float[size]}, size_{size} {}

template <>
__host__ FMBScene<MemoryLocation::DEVICE>::FMBScene(size_t size) : size_{size} {
CUDA_CHECK(cudaMalloc(&fmbs_, size * sizeof(FMB)));
CUDA_CHECK(cudaMalloc(&log_weights_, size * sizeof(float)));
}

template <>
__host__ FMBScene<MemoryLocation::HOST>::~FMBScene() {
delete[] fmbs_;
delete[] log_weights_;
}

template <>
__host__ FMBScene<MemoryLocation::DEVICE>::~FMBScene() {
CUDA_CHECK(cudaFree(fmbs_));
CUDA_CHECK(cudaFree(log_weights_));
}
94 changes: 37 additions & 57 deletions genmetaballs/src/cuda/core/fmb.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

#include <cuda/std/span>
#include <cuda/std/tuple>
#include <cuda_runtime.h>
#include <stdexcept>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <vector>

#include "geometry.cuh"
#include "utils.cuh"
Expand Down Expand Up @@ -46,74 +51,49 @@ public:
template <MemoryLocation location>
class FMBScene {
private:
FMB* fmbs_;
float* log_weights_;
// Host memory -> thrust::host_vector
// Device memory -> thrust::device_vector
template <typename T>
using vector_t = std::conditional_t<location == MemoryLocation::HOST, thrust::host_vector<T>,
thrust::device_vector<T>>;

vector_t<FMB> fmbs_;
vector_t<float> log_weights_;
size_t size_;

public:
__host__ FMBScene(size_t size);

__host__ ~FMBScene();

CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator[](const uint32_t i) {
return cuda::std::tie(fmbs_[i], log_weights_[i]);
__host__ FMBScene(size_t size) : size_{size}, fmbs_(size), log_weights_(size) {};

// Copy constructor from std::vector
// This enables easy construction from Python side
__host__ FMBScene<location>(const std::vector<FMB>& fmbs, const std::vector<float>& log_weights)
: size_{fmbs.size()}, fmbs_(fmbs.begin(), fmbs.end()),
log_weights_(log_weights.begin(), log_weights.end()) {
if (fmbs.size() != log_weights.size()) {
throw std::invalid_argument(
"FMBScene constructor: fmbs and log_weights must have the same size");
}
}

CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator[](const uint32_t i) const {
return cuda::std::tie(fmbs_[i], log_weights_[i]);
CUDA_CALLABLE auto operator[](const uint32_t i) {
return cuda::std::make_tuple(fmbs_[i], log_weights_[i]);
}

class Iterator {
private:
FMB* fmb_ptr_;
float* log_weight_ptr_;

public:
CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr)
: fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {}
CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator*() {
return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_);
}
CUDA_CALLABLE bool operator!=(const Iterator& other) const {
return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_;
}
CUDA_CALLABLE Iterator& operator++() {
fmb_ptr_++, log_weight_ptr_++;
return *this;
}
};

class ConstIterator {
private:
const FMB* fmb_ptr_;
const float* log_weight_ptr_;

public:
CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr)
: fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {}
CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator*() const {
return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_);
}
CUDA_CALLABLE bool operator!=(const ConstIterator& other) const {
return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_;
}
CUDA_CALLABLE ConstIterator& operator++() {
fmb_ptr_++, log_weight_ptr_++;
return *this;
}
};
CUDA_CALLABLE auto operator[](const uint32_t i) const {
return cuda::std::make_tuple(fmbs_[i], log_weights_[i]);
}

CUDA_CALLABLE Iterator begin() {
return Iterator(fmbs_, log_weights_);
CUDA_CALLABLE auto begin() {
return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin());
}
CUDA_CALLABLE Iterator end() {
return Iterator(fmbs_ + size_, log_weights_ + size_);
CUDA_CALLABLE auto end() {
return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end());
}
CUDA_CALLABLE ConstIterator begin() const {
return ConstIterator(fmbs_, log_weights_);
CUDA_CALLABLE auto begin() const {
return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin());
}
CUDA_CALLABLE ConstIterator end() const {
return ConstIterator(fmbs_ + size_, log_weights_ + size_);
CUDA_CALLABLE auto end() const {
return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end());
}
CUDA_CALLABLE const FMB& get_fmb(uint32_t idx) const {
return fmbs_[idx];
Expand Down
18 changes: 17 additions & 1 deletion genmetaballs/src/genmetaballs/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
TwoParameterConfidence,
ZeroParameterConfidence,
)
from genmetaballs._genmetaballs_bindings.fmb import CPUFMBScene, GPUFMBScene
from genmetaballs._genmetaballs_bindings.fmb import FMB, CPUFMBScene, GPUFMBScene
from genmetaballs._genmetaballs_bindings.image import CPUImage, GPUImage
from genmetaballs._genmetaballs_bindings.utils import CPUFloatArray2D, GPUFloatArray2D, sigmoid

Expand Down Expand Up @@ -63,6 +63,19 @@ def make_fmb_scene(size: int, device: DeviceType) -> CPUFMBScene | GPUFMBScene:
raise ValueError(f"Unsupported device type: {device}")


# TODO: create a wrapper class for FMBScene and turn the factory functions into
# class methods
def make_fmb_scene_from_values(
fmbs: list[fmb.FMB], log_weights: list[float], device: DeviceType
) -> CPUFMBScene | GPUFMBScene:
if device == "cpu":
return CPUFMBScene(fmbs, log_weights)
elif device == "gpu":
return GPUFMBScene(fmbs, log_weights)
else:
raise ValueError(f"Unsupported device type: {device}")


__all__ = [
"array2d_float",
"ZeroParameterConfidence",
Expand All @@ -74,7 +87,10 @@ def make_fmb_scene(size: int, device: DeviceType) -> CPUFMBScene | GPUFMBScene:
"intersector",
"sigmoid",
"FourParameterBlender",
"FMB",
"Intrinsics",
"ThreeParameterBlender",
"make_image",
"make_fmb_scene",
"make_fmb_scene_from_values",
]
32 changes: 31 additions & 1 deletion tests/python_tests/test_fmb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from scipy.spatial.distance import mahalanobis
from scipy.spatial.transform import Rotation as Rot

from genmetaballs.core import fmb, geometry, make_fmb_scene
from genmetaballs.core import fmb, geometry, make_fmb_scene, make_fmb_scene_from_values

FMB = fmb.FMB
Pose, Vec3D, Rotation = geometry.Pose, geometry.Vec3D, geometry.Rotation
Expand Down Expand Up @@ -48,3 +48,33 @@ def test_fmb_scene_creation():
gpu_scene = make_fmb_scene(20, device="gpu")
assert isinstance(gpu_scene, fmb.GPUFMBScene)
assert len(gpu_scene) == 20


@pytest.mark.parametrize("device", ["cpu", "gpu"])
def test_fmb_scene_creation_from_lists(rng, device):
fmbs = []
log_weights = []
gt_translations = []
gt_extents = []
num_balls = 15
for _ in range(num_balls):
quat = rng.uniform(size=4).astype(np.float32)
tran, extent = rng.uniform(size=(2, 3)).astype(np.float32)
pose = Pose.from_components(Rotation.from_quat(*quat), Vec3D(*tran))
fmbs.append(FMB(pose, *extent))
log_weights.append(rng.uniform())
gt_translations.append(tran)
gt_extents.append(extent)

scene = make_fmb_scene_from_values(fmbs, log_weights, device=device)

assert len(scene) == num_balls
# Verify that we can retrieve each FMB and log weight correctly
for i in range(num_balls):
fmb_i, log_weight = scene[i]
translation = fmb_i.pose.tran
assert np.allclose([translation.x, translation.y, translation.z], gt_translations[i])

fmb_extent = fmb_i.extent
assert np.allclose(fmb_extent, gt_extents[i])
assert np.isclose(log_weight, log_weights[i])