cmake: fix CMAKE_CUDA_ARCHITECTURES default (#2421)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2024-06-26 14:48:18 -04:00 committed by GitHub
parent 3a61070f82
commit da1823ed7a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 16 deletions

View File

@ -65,6 +65,10 @@ if (LLMODEL_VULKAN)
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
endif()
if (LLMODEL_CUDA)
if (DEFINED CMAKE_CUDA_ARCHITECTURES)
set(GGML_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
endif()
include(CheckLanguage)
check_language(CUDA)
if (NOT CMAKE_CUDA_COMPILER)

View File

@ -371,6 +371,20 @@ function(include_ggml SUFFIX)
find_package(CUDAToolkit REQUIRED)
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
set(GGML_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
else()
set(GGML_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
#set(GGML_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif()
endif()
message(STATUS "Using CUDA architectures: ${GGML_CUDA_ARCHITECTURES}")
set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml-cuda.h)
file(GLOB GGML_SOURCES_CUDA "${DIRECTORY}/ggml-cuda/*.cu")
@ -406,22 +420,6 @@ function(include_ggml SUFFIX)
endif()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver)
if (DEFINED CMAKE_CUDA_ARCHITECTURES)
set(GGML_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
else()
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
set(GGML_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
else()
set(GGML_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
#set(GGML_CUDA_ARCHITECTURES "") # use this to compile much faster, but only F16 models work
endif()
endif()
message(STATUS "Using CUDA architectures: ${GGML_CUDA_ARCHITECTURES}")
endif()
if (LLAMA_CLBLAST)