From 010a04d96fac9f995481e69ca5dbfa1f3a8bea0f Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Thu, 8 Jun 2023 07:23:41 -0400 Subject: [PATCH] Revert "Synced llama.cpp.cmake with upstream (#887)" This reverts commit 89910c7ca87435b9f636060c0f859cd236c9923e. --- gpt4all-backend/llama.cpp-mainline | 2 +- gpt4all-backend/llama.cpp.cmake | 226 ++++++++++------------------- 2 files changed, 79 insertions(+), 149 deletions(-) diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index 5b57a5b7..ecb217db 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit 5b57a5b72676540b6a45a3f527126299969ad241 +Subproject commit ecb217db4fcfa3880300ad08531a5fb6bb142d45 diff --git a/gpt4all-backend/llama.cpp.cmake b/gpt4all-backend/llama.cpp.cmake index 01ded39d..c7e59eb3 100644 --- a/gpt4all-backend/llama.cpp.cmake +++ b/gpt4all-backend/llama.cpp.cmake @@ -65,12 +65,8 @@ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" # 3rd party libs option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) -#option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) -#option(LLAMA_CLBLAST "llama: use CLBlast" OFF) -#option(LLAMA_METAL "llama: use Metal" OFF) -set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") +option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) # # Compile flags @@ -214,22 +210,86 @@ endif() function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) message(STATUS "Configuring ggml implementation target llama${SUFFIX} in ${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}") + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + message(STATUS "ARM detected") + if (MSVC) + # TODO: arm msvc? + else() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + add_compile_options(-mcpu=native) + endif() + # TODO: armv6,7,8 version specific flags + endif() + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") + message(STATUS "x86 detected") + if (MSVC) + if (LLAMA_AVX512) + add_compile_options($<$:/arch:AVX512>) + add_compile_options($<$:/arch:AVX512>) + # MSVC has no compile-time flags enabling specific + # AVX512 extensions, neither it defines the + # macros corresponding to the extensions. + # Do it manually. + if (LLAMA_AVX512_VBMI) + add_compile_definitions($<$:__AVX512VBMI__>) + add_compile_definitions($<$:__AVX512VBMI__>) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_definitions($<$:__AVX512VNNI__>) + add_compile_definitions($<$:__AVX512VNNI__>) + endif() + elseif (LLAMA_AVX2) + add_compile_options($<$:/arch:AVX2>) + add_compile_options($<$:/arch:AVX2>) + elseif (LLAMA_AVX) + add_compile_options($<$:/arch:AVX>) + add_compile_options($<$:/arch:AVX>) + endif() + else() + if (LLAMA_F16C) + add_compile_options(-mf16c) + endif() + if (LLAMA_FMA) + add_compile_options(-mfma) + endif() + if (LLAMA_AVX) + add_compile_options(-mavx) + endif() + if (LLAMA_AVX2) + add_compile_options(-mavx2) + endif() + if (LLAMA_AVX512) + add_compile_options(-mavx512f) + add_compile_options(-mavx512bw) + endif() + if (LLAMA_AVX512_VBMI) + add_compile_options(-mavx512vbmi) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_options(-mavx512vnni) + endif() + endif() + else() + # TODO: support PowerPC + message(STATUS "Unknown architecture") + endif() + # # Build libraries # - set(GGML_CUBLAS_USE NO) - if (LLAMA_CUBLAS) + if (LLAMA_CUBLAS AND EXISTS ${DIRECTORY}/ggml-cuda.h) cmake_minimum_required(VERSION 3.17) find_package(CUDAToolkit) if (CUDAToolkit_FOUND) - set(GGML_CUBLAS_USE YES) message(STATUS "cuBLAS found") enable_language(CUDA) - set(GGML_SOURCES_CUDA ${DIRECTORY}/ggml-cuda.cu ${DIRECTORY}/ggml-cuda.h) + set(GGML_CUDA_SOURCES ${DIRECTORY}/ggml-cuda.cu ${DIRECTORY}/ggml-cuda.h) + + add_compile_definitions(GGML_USE_CUBLAS) if (LLAMA_STATIC) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) @@ -242,19 +302,14 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) endif() endif() - set(GGML_CLBLAST_USE NO) - if (LLAMA_CLBLAST) + if (LLAMA_CLBLAST AND EXISTS ${DIRECTORY}/ggml-opencl.h) find_package(CLBlast) if (CLBlast_FOUND) - set(GGML_CLBLAST_USE YES) message(STATUS "CLBlast found") - set(GGML_OPENCL_SOURCE_FILE ggml-opencl.cpp) - if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}/${GGML_OPENCL_SOURCE_FILE}) - set(GGML_OPENCL_SOURCE_FILE ggml-opencl.c) - endif() + set(GGML_OPENCL_SOURCES ${DIRECTORY}/ggml-opencl.c ${DIRECTORY}/ggml-opencl.h) - set(GGML_OPENCL_SOURCES ${DIRECTORY}/${GGML_OPENCL_SOURCE_FILE} ${DIRECTORY}/ggml-opencl.h) + add_compile_definitions(GGML_USE_CLBLAST) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast) else() @@ -262,22 +317,15 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) endif() endif() - set(GGML_SOURCES_QUANT_K ) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}/ggml-quants-k.h) - set(GGML_SOURCES_QUANT_K - ${DIRECTORY}/ggml-quants-k.h - ${DIRECTORY}/ggml-quants-k.c) - endif() - add_library(ggml${SUFFIX} OBJECT ${DIRECTORY}/ggml.c ${DIRECTORY}/ggml.h - ${GGML_SOURCES_QUANT_K} - ${GGML_SOURCES_CUDA} + ${GGML_CUDA_SOURCES} ${GGML_OPENCL_SOURCES}) target_include_directories(ggml${SUFFIX} PUBLIC ${DIRECTORY}) target_compile_features(ggml${SUFFIX} PUBLIC c_std_11) # don't bump + target_link_libraries(ggml${SUFFIX} PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) if (BUILD_SHARED_LIBS) set_target_properties(ggml${SUFFIX} PROPERTIES POSITION_INDEPENDENT_CODE ON) @@ -290,13 +338,14 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) set(LLAMA_UTIL_SOURCE_FILE llama_util.h) endif() - add_library(llama${SUFFIX} + add_library(llama${SUFFIX} STATIC ${DIRECTORY}/llama.cpp ${DIRECTORY}/llama.h ${DIRECTORY}/${LLAMA_UTIL_SOURCE_FILE}) target_include_directories(llama${SUFFIX} PUBLIC ${DIRECTORY}) target_compile_features(llama${SUFFIX} PUBLIC cxx_std_11) # don't bump + target_link_libraries(llama${SUFFIX} PRIVATE ggml${SUFFIX} ${LLAMA_EXTRA_LIBS}) if (BUILD_SHARED_LIBS) set_target_properties(llama${SUFFIX} PROPERTIES POSITION_INDEPENDENT_CODE ON) @@ -304,7 +353,7 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) endif() endif() - if (GGML_SOURCES_CUDA) + if (GGML_CUDA_SOURCES) message(STATUS "GGML CUDA sources found, configuring CUDA architecture") set_property(TARGET ggml${SUFFIX} PROPERTY CUDA_ARCHITECTURES OFF) set_property(TARGET ggml${SUFFIX} PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") @@ -312,123 +361,4 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) set_property(TARGET llama${SUFFIX} PROPERTY CUDA_ARCHITECTURES OFF) endif() endif() - - if (GGML_CUBLAS_USE) - target_compile_definitions(ggml${SUFFIX} PRIVATE - GGML_USE_CUBLAS - GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} - GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) - if (WITH_LLAMA) - target_compile_definitions(llama${SUFFIX} PRIVATE - GGML_USE_CUBLAS - GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} - GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) - endif() - endif() - if (GGML_CLBLAST_USE) - if (WITH_LLAMA) - target_compile_definitions(llama${SUFFIX} PRIVATE GGML_USE_CLBLAST) - endif() - target_compile_definitions(ggml${SUFFIX} PRIVATE GGML_USE_CLBLAST) - endif() - - if (LLAMA_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) - find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) - - set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h) - - target_compile_definitions(llama${SUFFIX} PRIVATE - GGML_USE_METAL - GGML_METAL_NDEBUG) - - # get full path to the file - #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") - - # copy ggml-metal.metal to bin directory - configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) - - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} - ${FOUNDATION_LIBRARY} - ${METAL_FRAMEWORK} - ${METALKIT_FRAMEWORK} - ${METALPERFORMANCE_FRAMEWORK} - ) - endif() - - if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") - message(STATUS "ARM detected") - if (MSVC) - # TODO: arm msvc? - else() - if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") - target_compile_options(ggml${SUFFIX} PRIVATE -mcpu=native) - endif() - # TODO: armv6,7,8 version specific flags - endif() - elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") - message(STATUS "x86 detected") - if (MSVC) - if (LLAMA_AVX512) - target_compile_definitions(ggml${SUFFIX} PRIVATE - $<$:/arch:AVX512> - $<$:/arch:AVX512>) - # MSVC has no compile-time flags enabling specific - # AVX512 extensions, neither it defines the - # macros corresponding to the extensions. - # Do it manually. - if (LLAMA_AVX512_VBMI) - target_compile_definitions(ggml${SUFFIX} PRIVATE - $<$:__AVX512VBMI__> - $<$:__AVX512VBMI__>) - endif() - if (LLAMA_AVX512_VNNI) - target_compile_definitions(ggml${SUFFIX} PRIVATE - $<$:__AVX512VNNI__> - $<$:__AVX512VNNI__>) - endif() - elseif (LLAMA_AVX2) - target_compile_definitions(ggml${SUFFIX} PRIVATE - $<$:/arch:AVX2> - $<$:/arch:AVX2>) - elseif (LLAMA_AVX) - target_compile_definitions(ggml${SUFFIX} PRIVATE - $<$:/arch:AVX> - $<$:/arch:AVX>) - endif() - else() - if (LLAMA_F16C) - target_compile_options(ggml${SUFFIX} PRIVATE -mf16c) - endif() - if (LLAMA_FMA) - target_compile_options(ggml${SUFFIX} PRIVATE -mfma) - endif() - if (LLAMA_AVX) - target_compile_options(ggml${SUFFIX} PRIVATE -mavx) - endif() - if (LLAMA_AVX2) - target_compile_options(ggml${SUFFIX} PRIVATE -mavx2) - endif() - if (LLAMA_AVX512) - target_compile_options(ggml${SUFFIX} PRIVATE -mavx512f) - target_compile_options(ggml${SUFFIX} PRIVATE -mavx512bw) - endif() - if (LLAMA_AVX512_VBMI) - target_compile_options(ggml${SUFFIX} PRIVATE -mavx512vbmi) - endif() - if (LLAMA_AVX512_VNNI) - target_compile_options(ggml${SUFFIX} PRIVATE -mavx512vnni) - endif() - endif() - else() - # TODO: support PowerPC - message(STATUS "Unknown architecture") - endif() - - target_link_libraries(ggml${SUFFIX} PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) - if (WITH_LLAMA) - target_link_libraries(llama${SUFFIX} PRIVATE ggml${SUFFIX} ${LLAMA_EXTRA_LIBS}) - endif() endfunction()