Skip to content

Instantly share code, notes, and snippets.

@simonbyrne
Created August 22, 2025 00:45
Show Gist options
  • Save simonbyrne/ab772dde60df14908626e614143590cd to your computer and use it in GitHub Desktop.
Save simonbyrne/ab772dde60df14908626e614143590cd to your computer and use it in GitHub Desktop.
holoscan allocator with MatX
#include "matx.h"
#include "holoscan/holoscan.hpp"
#include <memory>
#include <type_traits>
#include <stdexcept>
namespace holoscan {
/**
* @brief Holoscan-based replacement for MatX's raw_pointer_buffer
*
* This class provides the same interface as MatX's raw_pointer_buffer but uses
* Holoscan allocators for memory management. It integrates seamlessly with
* MatX's basic_storage system.
*/
template <typename T>
class holoscan_pointer_buffer {
public:
using value_type = T;
using iterator = T*;
using citerator = T const*;
/**
* @brief Default constructor
*/
holoscan_pointer_buffer() = default;
/**
* @brief Construct and allocate memory using Holoscan allocator
*
* @param allocator Holoscan allocator to use
* @param size Size in bytes to allocate
* @param memory_type Memory storage type
*/
holoscan_pointer_buffer(std::shared_ptr<Allocator> allocator,
size_t size,
MemoryStorageType memory_type = MemoryStorageType::kDevice)
: allocator_(allocator), size_(size) {
if (!allocator_) {
throw std::invalid_argument("holoscan_pointer_buffer: allocator cannot be null");
}
nvidia::byte* raw_ptr = allocator_->allocate(size, memory_type);
if (!raw_ptr) {
throw std::bad_alloc();
}
T* ptr = reinterpret_cast<T*>(raw_ptr);
ConfigureShared(ptr);
}
/**
* @brief Copy constructor
*/
holoscan_pointer_buffer(const holoscan_pointer_buffer& other) = default;
/**
* @brief Move constructor
*/
holoscan_pointer_buffer(holoscan_pointer_buffer&& other) noexcept = default;
/**
* @brief Assignment operator
*/
holoscan_pointer_buffer& operator=(const holoscan_pointer_buffer& other) = default;
/**
* @brief Move assignment operator
*/
holoscan_pointer_buffer& operator=(holoscan_pointer_buffer&& other) noexcept = default;
/**
* @brief Get raw data pointer
*/
__MATX_INLINE__ __MATX_HOST__ T* Data() const noexcept {
return data_.get();
}
/**
* @brief Get raw data pointer (lowercase for basic_storage compatibility)
*/
__MATX_INLINE__ __MATX_HOST__ T* data() noexcept {
return data_.get();
}
/**
* @brief Get raw data pointer (const version for basic_storage compatibility)
*/
__MATX_INLINE__ __MATX_HOST__ const T* data() const noexcept {
return data_.get();
}
/**
* @brief Get size in elements
*/
__MATX_INLINE__ __MATX_HOST__ auto Size() const noexcept {
return size_ / sizeof(T);
}
/**
* @brief Get size in elements (lowercase for basic_storage compatibility)
*/
__MATX_INLINE__ __MATX_HOST__ auto size() const noexcept {
return size_ / sizeof(T);
}
/**
* @brief Get capacity in elements (for basic_storage compatibility)
*/
__MATX_INLINE__ __MATX_HOST__ auto capacity() const noexcept {
return size_ / sizeof(T);
}
/**
* @brief Get size in bytes
*/
__MATX_INLINE__ __MATX_HOST__ auto Bytes() const noexcept {
return size_;
}
/**
* @brief Get iterator to beginning
*/
__MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ iterator begin() const noexcept {
return data_.get();
}
/**
* @brief Get iterator to end
*/
__MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ iterator end() const noexcept {
return data_.get() + Size();
}
/**
* @brief Get const iterator to beginning
*/
__MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ citerator cbegin() const noexcept {
return data_.get();
}
/**
* @brief Get const iterator to end
*/
__MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ citerator cend() const noexcept {
return data_.get() + Size();
}
/**
* @brief Get reference count
*/
__MATX_INLINE__ __MATX_HOST__ auto use_count() const noexcept {
return data_.use_count();
}
/**
* @brief Swap with another buffer
*/
friend void swap(holoscan_pointer_buffer& lhs, holoscan_pointer_buffer& rhs) noexcept {
std::swap(lhs.allocator_, rhs.allocator_);
std::swap(lhs.data_, rhs.data_);
std::swap(lhs.size_, rhs.size_);
}
private:
std::shared_ptr<Allocator> allocator_;
std::shared_ptr<T> data_;
size_t size_ = 0;
void ConfigureShared(T* ptr) {
// Always use Holoscan allocator for deallocation since we always own the memory
data_ = std::shared_ptr<T>(ptr, [allocator = allocator_](auto p) {
if (allocator && p) {
allocator->free(reinterpret_cast<nvidia::byte*>(p));
}
});
}
};
} // namespace holoscan
namespace matx {
/**
* @brief Create a MatX tensor using a Holoscan allocator with C array shape
*
* This creates a tensor that uses the Holoscan allocator for memory management.
* The tensor will properly clean up memory when it goes out of scope.
*
* @tparam T Element type of the tensor
* @tparam RANK Number of dimensions
* @param allocator Holoscan allocator instance
* @param shape Shape of tensor as C array
* @param memory_type Memory storage type (default: device memory)
* @param stream CUDA stream (optional, defaults to 0)
* @return New tensor allocated with the Holoscan allocator
*/
template <typename T, int RANK>
auto make_tensor(std::shared_ptr<holoscan::Allocator> allocator,
const index_t (&shape)[RANK],
holoscan::MemoryStorageType memory_type = holoscan::MemoryStorageType::kDevice,
cudaStream_t stream = 0) {
// Create descriptor for the tensor shape
DefaultDescriptor<RANK> desc{shape};
// Calculate total size needed
size_t size = static_cast<size_t>(desc.TotalSize()) * sizeof(T);
// Create Holoscan pointer buffer that handles allocation/deallocation
holoscan::holoscan_pointer_buffer<T> buffer(allocator, size, memory_type);
// Create basic_storage with the Holoscan buffer
basic_storage<holoscan::holoscan_pointer_buffer<T>> storage{std::move(buffer)};
// Create tensor with the storage
return tensor_t<T, RANK, decltype(storage), decltype(desc)>{std::move(storage), std::move(desc)};
}
/**
* @brief Create a MatX tensor using a Holoscan allocator with container shape
*
* @tparam T Element type of the tensor
* @tparam ShapeType Container type for shape (e.g., std::array, std::vector)
* @param allocator Holoscan allocator instance
* @param shape Shape of tensor as container
* @param memory_type Memory storage type (default: device memory)
* @param stream CUDA stream (optional, defaults to 0)
* @return New tensor allocated with the Holoscan allocator
*/
template <typename T, typename ShapeType,
std::enable_if_t<!std::is_array_v<std::remove_cv_t<std::remove_reference_t<ShapeType>>>, bool> = true>
auto make_tensor(std::shared_ptr<holoscan::Allocator> allocator,
ShapeType&& shape,
holoscan::MemoryStorageType memory_type = holoscan::MemoryStorageType::kDevice,
cudaStream_t stream = 0) {
constexpr int rank = static_cast<int>(std::tuple_size_v<std::remove_cv_t<std::remove_reference_t<ShapeType>>>);
DefaultDescriptor<rank> desc{std::forward<ShapeType>(shape)};
// Calculate total size needed
size_t size = static_cast<size_t>(desc.TotalSize()) * sizeof(T);
// Create Holoscan pointer buffer that handles allocation/deallocation
holoscan::holoscan_pointer_buffer<T> buffer(allocator, size, memory_type);
// Create basic_storage with the Holoscan buffer
basic_storage<holoscan::holoscan_pointer_buffer<T>> storage{std::move(buffer)};
// Create tensor with the storage
return tensor_t<T, rank, decltype(storage), decltype(desc)>{std::move(storage), std::move(desc)};
}
/**
* @brief Create a MatX tensor using a Holoscan allocator with existing tensor reference
*
* @tparam TensorType MatX tensor type
* @param tensor Reference to tensor to populate
* @param allocator Holoscan allocator instance
* @param shape Shape of tensor as C array
* @param memory_type Memory storage type (default: device memory)
* @param stream CUDA stream (optional, defaults to 0)
*/
template <typename TensorType,
std::enable_if_t<is_tensor_view_v<TensorType>, bool> = true>
void make_tensor(TensorType& tensor,
std::shared_ptr<holoscan::Allocator> allocator,
const index_t (&shape)[TensorType::Rank()],
holoscan::MemoryStorageType memory_type = holoscan::MemoryStorageType::kDevice,
cudaStream_t stream = 0) {
auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(
allocator, shape, memory_type, stream);
tensor.Shallow(tmp);
}
/**
* @brief Create a 0D (scalar) MatX tensor using a Holoscan allocator
*
* @tparam T Element type of the tensor
* @param allocator Holoscan allocator instance
* @param memory_type Memory storage type (default: device memory)
* @param stream CUDA stream (optional, defaults to 0)
* @return New 0D tensor allocated with the Holoscan allocator
*/
template <typename T>
auto make_tensor(std::shared_ptr<holoscan::Allocator> allocator,
holoscan::MemoryStorageType memory_type = holoscan::MemoryStorageType::kDevice,
cudaStream_t stream = 0) {
// Create a scalar tensor (0-dimensional) directly without using std::array
DefaultDescriptor<0> desc{};
// Calculate total size needed (just one element)
size_t size = sizeof(T);
// Create Holoscan pointer buffer that handles allocation/deallocation
holoscan::holoscan_pointer_buffer<T> buffer(allocator, size, memory_type);
// Create basic_storage with the Holoscan buffer
basic_storage<holoscan::holoscan_pointer_buffer<T>> storage{std::move(buffer)};
// Create tensor with the storage
return tensor_t<T, 0, decltype(storage), decltype(desc)>{std::move(storage), std::move(desc)};
}
} // namespace matx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment