From f79557d2aa15390aae7785ebdfe8f987170bce8f Mon Sep 17 00:00:00 2001 From: Aaron Miller Date: Mon, 16 Oct 2023 10:03:57 -0700 Subject: [PATCH] speedup: just use mat*vec shaders for mat*mat so far my from-scratch mat*mats are still slower than just running more invocations of the existing Metal ported mat*vec shaders - it should be theoretically possible to make a mat*mat that's faster (for actual mat*mat cases) than an optimal mat*vec, but it will need to be at *least* as fast as the mat*vec op and then take special care to be cache-friendly and save memory bandwidth, as the # of compute ops is the same --- gpt4all-backend/llama.cpp-mainline | 2 +- gpt4all-backend/llama.cpp.cmake | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index 500689ad..81c24d7b 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit 500689ad356a81a471a7fb68cc70f7aee5a5f56e +Subproject commit 81c24d7b7df0d3564c8563bb769bd0302588fe1f diff --git a/gpt4all-backend/llama.cpp.cmake b/gpt4all-backend/llama.cpp.cmake index ada5b16b..3aa2ec45 100644 --- a/gpt4all-backend/llama.cpp.cmake +++ b/gpt4all-backend/llama.cpp.cmake @@ -239,11 +239,6 @@ if (LLAMA_KOMPUTE) kompute/op_rmsnorm.comp kompute/op_diagmask.comp kompute/op_mul_mat_mat_f32.comp - kompute/op_mul_mat_mat_f16.comp - kompute/op_mul_mat_mat_q8_0.comp - kompute/op_mul_mat_mat_q4_0.comp - kompute/op_mul_mat_mat_q4_1.comp - kompute/op_mul_mat_mat_q6_k.comp kompute/op_mul_mat_f16.comp kompute/op_mul_mat_q8_0.comp kompute/op_mul_mat_q4_0.comp @@ -275,11 +270,6 @@ if (LLAMA_KOMPUTE) shaderop_rmsnorm.h shaderop_diagmask.h shaderop_mul_mat_mat_f32.h - shaderop_mul_mat_mat_f16.h - shaderop_mul_mat_mat_q8_0.h - shaderop_mul_mat_mat_q4_0.h - shaderop_mul_mat_mat_q4_1.h - shaderop_mul_mat_mat_q6_k.h shaderop_mul_mat_f16.h shaderop_mul_mat_q8_0.h shaderop_mul_mat_q4_0.h