mirror of
https://github.com/ollama/ollama.git
synced 2026-05-13 22:37:14 +00:00
llama/compat: shrink patch to pure call-site hooks (34 -> 20 lines)
Two reductions:
1. Drop the gguf_rename_tensor forwarder from gguf.h/gguf.cpp.
The rename-in-place trick it does (calling ggml_set_name on an embedded
ggml_tensor) can be done from outside gguf.cpp via:
char * p = const_cast<char *>(gguf_get_tensor_name(meta, id));
strncpy(p, new_name, GGML_MAX_NAME - 1);
That pointer aims into a mutable char[GGML_MAX_NAME] inside a std::vector
element; the const on the return type is API courtesy. This is defined
behavior and has no struct-layout dependency.
2. Drop the src/CMakeLists.txt hunk that added llama-ollama-compat.cpp to
the llama target. Replace with a target_sources() call in Ollama's
llama/server/CMakeLists.txt after FetchContent_MakeAvailable. Our
compat files now stay in llama/compat/ and are never copied into the
fetched _deps/ tree.
Net patch now touches 3 files, 20 lines, all pure call-site insertions:
src/llama-model-loader.cpp +8 (include + translate + 2x should_skip)
src/llama-model.cpp +4 (include + apply_tensor_transforms)
tools/mtmd/clip.cpp +8 (include + translate_clip + maybe_load)
Verified: fresh build from scratch (rm -rf build && cmake configure)
runs PATCH_COMMAND cleanly, compiles, and ollama run gemma3 still works
end-to-end for text + vision.
This commit is contained in:
parent
021389f7bb
commit
61b367ec29
4 changed files with 49 additions and 60 deletions
|
|
@ -24,21 +24,26 @@
|
|||
set(_compat_dir ${CMAKE_CURRENT_LIST_DIR})
|
||||
|
||||
# Expose a single variable the main CMakeLists passes into FetchContent's
|
||||
# PATCH_COMMAND. Uses CMake's own `-E copy` for cross-platform file copy.
|
||||
# The patch itself is applied via a small CMake script so the step is
|
||||
# idempotent — re-configuring or rebuilding won't fail with "already applied".
|
||||
# PATCH_COMMAND. The patch is applied via a small CMake script so the step
|
||||
# is idempotent — re-configuring or rebuilding won't fail with "already
|
||||
# applied".
|
||||
#
|
||||
# The compat source files (.h, .cpp) are NOT copied into the fetched tree.
|
||||
# Instead, llama/server/CMakeLists.txt does target_sources() on the llama
|
||||
# target after FetchContent_MakeAvailable. That keeps Ollama's code in
|
||||
# Ollama's tree and makes the patch pure call-site insertions.
|
||||
set(OLLAMA_LLAMA_CPP_COMPAT_PATCH_COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
"${_compat_dir}/llama-ollama-compat.h"
|
||||
"src/llama-ollama-compat.h"
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
"${_compat_dir}/llama-ollama-compat.cpp"
|
||||
"src/llama-ollama-compat.cpp"
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
${CMAKE_COMMAND}
|
||||
-DPATCH_FILE=${_compat_dir}/upstream-edits.patch
|
||||
-P ${_compat_dir}/apply-patch.cmake
|
||||
CACHE INTERNAL "llama.cpp compat patch command for FetchContent")
|
||||
|
||||
# Where the compat source files live, so the main CMakeLists can wire them
|
||||
# into the llama target.
|
||||
set(OLLAMA_LLAMA_CPP_COMPAT_DIR
|
||||
"${_compat_dir}"
|
||||
CACHE INTERNAL "Directory holding llama-ollama-compat.{h,cpp}")
|
||||
|
||||
# Also export the individual paths in case callers want to do something
|
||||
# custom (e.g. emit a dependency on the patch so reconfigures re-apply).
|
||||
set(OLLAMA_LLAMA_CPP_COMPAT_PATCH_FILE
|
||||
|
|
|
|||
17
llama/compat/llama-ollama-compat.cpp
vendored
17
llama/compat/llama-ollama-compat.cpp
vendored
|
|
@ -224,9 +224,24 @@ namespace {
|
|||
|
||||
// Rename a tensor in BOTH the gguf_context and the ggml_context so that all
|
||||
// name-based lookups — offset map, ggml_get_tensor, tensor.name — agree.
|
||||
//
|
||||
// The gguf_context side is a bit sneaky: gguf_get_tensor_name returns a
|
||||
// pointer into the embedded ggml_tensor's `name[GGML_MAX_NAME]` buffer.
|
||||
// That buffer is non-const storage inside a std::vector element; the const
|
||||
// on the return type is just API hygiene. Casting it away and strncpy'ing
|
||||
// a new name is well-defined and avoids needing to patch gguf's internals.
|
||||
void rename_tensor(gguf_context * meta, ggml_context * ctx,
|
||||
const char * old_name, const char * new_name) {
|
||||
if (!gguf_rename_tensor(meta, old_name, new_name)) return;
|
||||
const int64_t id = gguf_find_tensor(meta, old_name);
|
||||
if (id < 0) return;
|
||||
|
||||
// Update the gguf-side name (what gguf_get_tensor_name returns later).
|
||||
if (char * name_ptr = const_cast<char *>(gguf_get_tensor_name(meta, id))) {
|
||||
std::strncpy(name_ptr, new_name, GGML_MAX_NAME - 1);
|
||||
name_ptr[GGML_MAX_NAME - 1] = '\0';
|
||||
}
|
||||
|
||||
// Update the ggml-side name (what ggml_get_tensor looks up by).
|
||||
if (ggml_tensor * t = ggml_get_tensor(ctx, old_name)) {
|
||||
ggml_set_name(t, new_name);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,50 +1,3 @@
|
|||
diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h
|
||||
index 02d5f221c..2f64264f4 100644
|
||||
--- a/ggml/include/gguf.h
|
||||
+++ b/ggml/include/gguf.h
|
||||
@@ -162,6 +162,9 @@ extern "C" {
|
||||
// assumes that at least gguf_get_tensor_size bytes can be read from data
|
||||
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
|
||||
|
||||
+ // rename a tensor. Returns false if old_name is not present.
|
||||
+ GGML_API bool gguf_rename_tensor(struct gguf_context * ctx, const char * old_name, const char * new_name);
|
||||
+
|
||||
// writing gguf files can be done in 3 ways:
|
||||
//
|
||||
// - write the entire gguf_context to a binary file in a single pass:
|
||||
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
|
||||
index ab3cc9748..e3d06c959 100644
|
||||
--- a/ggml/src/gguf.cpp
|
||||
+++ b/ggml/src/gguf.cpp
|
||||
@@ -1280,6 +1280,16 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
|
||||
ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
|
||||
}
|
||||
|
||||
+bool gguf_rename_tensor(struct gguf_context * ctx, const char * old_name, const char * new_name) {
|
||||
+ const int64_t tensor_id = gguf_find_tensor(ctx, old_name);
|
||||
+ if (tensor_id < 0) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ // ggml_set_name truncates to GGML_MAX_NAME.
|
||||
+ ggml_set_name(&ctx->info[tensor_id].t, new_name);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
struct gguf_writer_base {
|
||||
size_t written_bytes {0u};
|
||||
|
||||
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
|
||||
index 7b1fcfca0..155a819fa 100644
|
||||
--- a/src/CMakeLists.txt
|
||||
+++ b/src/CMakeLists.txt
|
||||
@@ -32,6 +32,7 @@ add_library(llama
|
||||
llama-model-loader.cpp
|
||||
llama-model-saver.cpp
|
||||
llama-model.cpp
|
||||
+ llama-ollama-compat.cpp
|
||||
llama-quant.cpp
|
||||
llama-sampler.cpp
|
||||
llama-vocab.cpp
|
||||
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
|
||||
index 4e65a45a5..75836c683 100644
|
||||
--- a/src/llama-model-loader.cpp
|
||||
|
|
@ -108,14 +61,14 @@ index 4ded484dd..7d3509c23 100644
|
|||
|
||||
if (use_mmap_buffer) {
|
||||
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
|
||||
index f0e8786b6..1e6319ca0 100644
|
||||
index f0e8786b6..35defa89d 100644
|
||||
--- a/tools/mtmd/clip.cpp
|
||||
+++ b/tools/mtmd/clip.cpp
|
||||
@@ -10,6 +10,8 @@
|
||||
#include "ggml-backend.h"
|
||||
#include "gguf.h"
|
||||
|
||||
+#include "src/llama-ollama-compat.h"
|
||||
+#include "llama-ollama-compat.h"
|
||||
+
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
|
|
|||
|
|
@ -71,6 +71,22 @@ FetchContent_Declare(
|
|||
)
|
||||
FetchContent_MakeAvailable(llama_cpp)
|
||||
|
||||
# Link the Ollama-compat source files into the fetched llama target.
|
||||
# Kept separate from the upstream-edits patch so our .cpp/.h stay
|
||||
# on-disk in llama/compat/ rather than being copied into _deps/.
|
||||
if(DEFINED OLLAMA_LLAMA_CPP_COMPAT_DIR)
|
||||
target_sources(llama PRIVATE
|
||||
${OLLAMA_LLAMA_CPP_COMPAT_DIR}/llama-ollama-compat.cpp)
|
||||
target_include_directories(llama PRIVATE
|
||||
${OLLAMA_LLAMA_CPP_COMPAT_DIR})
|
||||
# mtmd's clip.cpp #include's the compat header too — add the same dir
|
||||
# to its PRIVATE include path (PRIVATE on llama doesn't propagate).
|
||||
if(TARGET mtmd)
|
||||
target_include_directories(mtmd PRIVATE
|
||||
${OLLAMA_LLAMA_CPP_COMPAT_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Find GPU toolkits for runtime dependency bundling.
|
||||
# The upstream llama.cpp build finds these internally, but we need the
|
||||
# variables (CUDAToolkit_LIBRARY_DIR, etc.) in our install scope.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue