mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-08 07:10:32 +00:00
backend(build): set CUDA arch defaults before enable_language(CUDA) (#2855)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
971c83d1d3
commit
7463b2170b
@ -63,6 +63,24 @@ if (LLMODEL_VULKAN)
|
||||
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
|
||||
endif()
|
||||
if (LLMODEL_CUDA)
|
||||
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
|
||||
|
||||
# Defaults must be set before enable_language(CUDA).
|
||||
# Keep this in sync with the arch list in ggml/src/CMakeLists.txt.
|
||||
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||
# 52 == lowest CUDA 12 standard
|
||||
# 60 == f16 CUDA intrinsics
|
||||
# 61 == integer CUDA intrinsics
|
||||
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
|
||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
|
||||
include(CheckLanguage)
|
||||
check_language(CUDA)
|
||||
if (NOT CMAKE_CUDA_COMPILER)
|
||||
|
@ -378,19 +378,7 @@ function(include_ggml SUFFIX)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
|
||||
|
||||
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||
# 52 == lowest CUDA 12 standard
|
||||
# 60 == f16 CUDA intrinsics
|
||||
# 61 == integer CUDA intrinsics
|
||||
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
|
||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
# architectures are set in gpt4all-backend/CMakeLists.txt
|
||||
|
||||
set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml/include/ggml-cuda.h)
|
||||
file(GLOB GGML_HEADERS_CUDA "${DIRECTORY}/ggml/src/ggml-cuda/*.cuh")
|
||||
|
@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
### Fixed
|
||||
- Do not initialize Vulkan driver when only using CPU ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
|
||||
- Fix a potential crash on exit when using only CPU on Linux with NVIDIA (does not affect X11) ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
|
||||
- Fix default CUDA architecture list after [#2802](https://github.com/nomic-ai/gpt4all/pull/2802) ([#2855](https://github.com/nomic-ai/gpt4all/pull/2855))
|
||||
|
||||
## [3.2.0] - 2024-08-12
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user