diff --git a/CMakeLists.txt b/CMakeLists.txt index 75fd67edd..c0040a4ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,17 +50,25 @@ project(k2 ${languages}) set(K2_VERSION "1.23.4") # ----------------- Supported build types for K2 project ----------------- +message(STATUS "The generator is ${CMAKE_GENERATOR}") +message(STATUS "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") +message(STATUS "CMAKE_CONFIGURATION_TYPES=${CMAKE_CONFIGURATION_TYPES}") set(K2_ALLOWABLE_BUILD_TYPES Debug Release RelWithDebInfo MinSizeRel) set(K2_DEFAULT_BUILD_TYPE "Release") -set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${K2_ALLOWABLE_BUILD_TYPES}") if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) # CMAKE_CONFIGURATION_TYPES: with config type values from other generators (IDE). message(STATUS "No CMAKE_BUILD_TYPE given, default to ${K2_DEFAULT_BUILD_TYPE}") - set(CMAKE_BUILD_TYPE "${K2_DEFAULT_BUILD_TYPE}") + set(CMAKE_BUILD_TYPE "${K2_DEFAULT_BUILD_TYPE}" CACHE STRING "" FORCE) +elseif(WIN32 AND NOT CMAKE_BUILD_TYPE AND CMAKE_CONFIGURATION_TYPES) + # for Generator Visual Studio 16 2019 + message(STATUS "No CMAKE_BUILD_TYPE given, default to ${K2_DEFAULT_BUILD_TYPE}") + set(CMAKE_BUILD_TYPE "${K2_DEFAULT_BUILD_TYPE}" CACHE STRING "" FORCE) elseif(NOT CMAKE_BUILD_TYPE IN_LIST K2_ALLOWABLE_BUILD_TYPES) message(FATAL_ERROR "Invalid build type: ${CMAKE_BUILD_TYPE}, \ choose one from ${K2_ALLOWABLE_BUILD_TYPES}") endif() +# win32 will not find CMAKE_BUILD_TYPE in CACHE, if not set CMAKE_BUILD_TYPE CACHE +set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${K2_ALLOWABLE_BUILD_TYPES}") string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPERCASE) if("${CMAKE_BUILD_TYPE_UPPERCASE}" STREQUAL "DEBUG") @@ -72,11 +80,11 @@ endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) option(BUILD_SHARED_LIBS "Whether to build shared or static lib" ON) -option(K2_USE_PYTORCH "Whether to build with PyTorch" ON) -option(K2_ENABLE_BENCHMARK "Whether to enable benchmark" ON) +option(K2_USE_PYTORCH "Whether to build with PyTorch" OFF) +option(K2_ENABLE_BENCHMARK "Whether to enable benchmark" OFF) option(K2_WITH_CUDA "Whether to build k2 with CUDA" ${_K2_WITH_CUDA}) -option(K2_ENABLE_NVTX "Whether to build k2 with the NVTX library" ON) -option(K2_ENABLE_TESTS "Whether to build tests" ON) +option(K2_ENABLE_NVTX "Whether to build k2 with the NVTX library" OFF) +option(K2_ENABLE_TESTS "Whether to build tests" OFF) # You have to enable this option if you will run k2 on a machine different from # the one you used to build k2 and the two machines have different types of GPUs @@ -91,9 +99,20 @@ if(NOT K2_WITH_CUDA) set(K2_ENABLE_NVTX OFF CACHE BOOL "" FORCE) endif() +if(K2_WITH_CUDA) + if(${CMAKE_VERSION} VERSION_LESS "3.18.0") + find_package(CUDA QUIET) + message(STATUS "CUDA_INCLUDE_DIRS=${CUDA_INCLUDE_DIRS}") + message(STATUS "CUDA_LIBRARIES=${CUDA_LIBRARIES}") + message(STATUS "CUDA_nvToolsExt_LIBRARY=${CUDA_nvToolsExt_LIBRARY}") + message(STATUS "CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}") + endif() +endif() + if(NOT K2_USE_PYTORCH) - message(FATAL_ERROR "\ - Please set K2_USE_PYTORCH to ON. + message(WARNING "\ + K2_USE_PYTORCH is OFF, only k2 core lib will be build. + If you want using with PyTorch, please turn it ON. Support for other frameworks will be added later") endif() @@ -101,11 +120,17 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") -if(WIN32 AND BUILD_SHARED_LIBS) - message(STATUS "Set BUILD_SHARED_LIBS to OFF for Windows") - set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) +if(WIN32) + # win32 dll export symbols + # https://cmake.org/cmake/help/v3.4/prop_tgt/WINDOWS_EXPORT_ALL_SYMBOLS.html#prop_tgt:WINDOWS_EXPORT_ALL_SYMBOLS + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON CACHE BOOL "" FORCE) endif() +# if(WIN32 AND BUILD_SHARED_LIBS) +# message(STATUS "Set BUILD_SHARED_LIBS to OFF for Windows") +# set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) +# endif() + set(CMAKE_SKIP_BUILD_RPATH FALSE) set(BUILD_RPATH_USE_ORIGIN TRUE) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) @@ -415,7 +440,9 @@ install(FILES DESTINATION share/cmake/k2 ) -install(FILES - ${PROJECT_SOURCE_DIR}/k2/python/k2/torch_version.py - DESTINATION ./ -) +if(K2_USE_PYTORCH) + install(FILES + ${PROJECT_SOURCE_DIR}/k2/python/k2/torch_version.py + DESTINATION ./ + ) +endif() diff --git a/k2/CMakeLists.txt b/k2/CMakeLists.txt index 6a7839d0e..ff7fecb95 100644 --- a/k2/CMakeLists.txt +++ b/k2/CMakeLists.txt @@ -1,5 +1,7 @@ add_subdirectory(csrc) -add_subdirectory(python) +if(K2_USE_PYTORCH) + add_subdirectory(python) +endif() if(K2_USE_PYTORCH) # We use K2_TORCH_VERSION instead of TORCH_VERSION diff --git a/k2/csrc/CMakeLists.txt b/k2/csrc/CMakeLists.txt index 736668e9b..a371138bb 100644 --- a/k2/csrc/CMakeLists.txt +++ b/k2/csrc/CMakeLists.txt @@ -38,6 +38,8 @@ add_library(k2_nvtx INTERFACE) target_include_directories(k2_nvtx INTERFACE ${CMAKE_SOURCE_DIR}) if(K2_ENABLE_NVTX) target_compile_definitions(k2_nvtx INTERFACE K2_ENABLE_NVTX=1) + target_include_directories(k2_nvtx INTERFACE ${CUDA_INCLUDE_DIRS}) + target_link_libraries(k2_nvtx INTERFACE ${CUDA_nvToolsExt_LIBRARY}) if(WIN32) target_include_directories(k2_nvtx INTERFACE ${CUDA_TOOLKIT_ROOT_DIR}/include/nvtx3 @@ -79,14 +81,13 @@ set(context_srcs thread_pool.cu timer.cu top_sort.cu - torch_util.cu utils.cu nbest.cu ) if(K2_USE_PYTORCH) - list(APPEND context_srcs pytorch_context.cu) + list(APPEND context_srcs pytorch_context.cu torch_util.cu) else() list(APPEND context_srcs default_context.cu) endif() diff --git a/k2/csrc/default_context.cu b/k2/csrc/default_context.cu index d606a0d94..1a1e17e51 100644 --- a/k2/csrc/default_context.cu +++ b/k2/csrc/default_context.cu @@ -18,11 +18,17 @@ */ #include +#include #include // NOLINT #include "k2/csrc/context.h" #include "k2/csrc/log.h" #include "k2/csrc/nvtx.h" +#include "k2/csrc/device_guard.h" + +#ifdef _WIN32 +#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno) +#endif // _WIN32 namespace k2 { @@ -32,7 +38,7 @@ static constexpr std::size_t kAlignment = 64; class CpuContext : public Context { public: CpuContext() = default; - ContextPtr GetCpuContext() override { return shared_from_this(); } + ContextPtr GetCpuContext() { return shared_from_this(); } DeviceType GetDeviceType() const override { return kCpu; } void *Allocate(std::size_t bytes, void **deleter_context) override { @@ -52,6 +58,28 @@ class CpuContext : public Context { void Deallocate(void *data, void * /*deleter_context*/) override { free(data); } + + void CopyDataTo(size_t num_bytes, const void *src, ContextPtr dst_context, + void *dst) override { + DeviceType device_type = dst_context->GetDeviceType(); + switch (device_type) { + case kCpu: + memcpy(dst, src, num_bytes); + break; + case kCuda: { + // CPU -> CUDA + DeviceGuard guard(dst_context); + ContextPtr pinned_context = GetPinnedContext(); + auto region = NewRegion(pinned_context, num_bytes); + memcpy(region->data, src, num_bytes); + pinned_context->CopyDataTo(num_bytes, region->data, dst_context, dst); + break; + } + default: + K2_LOG(FATAL) << "Unsupported device type: " << device_type; + break; + } + } }; class CudaContext : public Context { @@ -66,7 +94,7 @@ class CudaContext : public Context { auto ret = cudaStreamCreate(&stream_); K2_CHECK_CUDA_ERROR(ret); } - ContextPtr GetCpuContext() override { return k2::GetCpuContext(); } + ContextPtr GetCpuContext() { return k2::GetCpuContext(); } DeviceType GetDeviceType() const override { return kCuda; } int32_t GetDeviceId() const override { return gpu_id_; } @@ -98,6 +126,29 @@ class CudaContext : public Context { K2_CHECK_CUDA_ERROR(ret); } + void CopyDataTo(size_t num_bytes, const void *src, ContextPtr dst_context, + void *dst) override { + DeviceType device_type = dst_context->GetDeviceType(); + switch (device_type) { + case kCpu: { + cudaError_t ret = + cudaMemcpy(dst, src, num_bytes, cudaMemcpyDeviceToHost); + K2_CHECK_CUDA_ERROR(ret); + break; + } + case kCuda: { + cudaError_t ret = + cudaMemcpyAsync(dst, src, num_bytes, cudaMemcpyDeviceToDevice, + dst_context->GetCudaStream()); + K2_CHECK_CUDA_ERROR(ret); + break; + } + default: + K2_LOG(FATAL) << "Unsupported device type: " << device_type; + break; + } + } + ~CudaContext() { auto ret = cudaStreamDestroy(stream_); K2_CHECK_CUDA_ERROR(ret); diff --git a/k2/csrc/fake_cuda.h b/k2/csrc/fake_cuda.h index 29915248e..a35a8d370 100644 --- a/k2/csrc/fake_cuda.h +++ b/k2/csrc/fake_cuda.h @@ -92,6 +92,11 @@ enum FakedEnum { using cudaMemcpyKind = FakedEnum; +inline cudaError_t cudaSetDevice(int device) { + K2_NIY; + return 0; +} + inline const char *cudaGetErrorString(cudaError_t error) { K2_NIY; return nullptr; @@ -186,6 +191,16 @@ inline cudaError_t cudaMallocHost(void **ptr, size_t size) { return 0; } +inline cudaError_t cudaMalloc(void **devPtr, size_t size) { + K2_NIY; + return 0; +} + +inline cudaError_t cudaFree(void *devPtr) { + K2_NIY; + return 0; +} + } // namespace k2 namespace cub {