mirror of
https://github.com/ollama/ollama.git
synced 2026-05-13 14:27:00 +00:00
llama/compat: collapse text-loader hook back to one new patch line
Previous addition for glm-ocr added 4 lines to upstream-edits.patch
(1 in constructor for set_loader_path + 3 in load_all_data for the
maybe_load_text_tensor if/continue/closing-brace block). Refactor to
+1 line:
* Fold path capture into translate_metadata: signature now takes
`const char * fname` and stashes it on the per-loader registry
internally. The constructor still has just one llama_compat call
(we just changed the argument list).
* Make maybe_load_text_tensor self-contained: it derives the buffer
type from `cur->buffer` rather than the caller passing it. The
hook line in load_all_data collapses to a single
`if (llama_ollama_compat::maybe_load_text_tensor(this, cur, weight->offs)) continue;`.
* Drop the public `set_loader_path` symbol — it's now an internal
detail of translate_metadata.
Net patch growth from glm-ocr support: +1 line. Total upstream patch
is back to its original 17-line surface.
Functional: glm-ocr text generation still works ("Paris" via raw
turn-template completion), concat ops still fire (28MB per block).
This commit is contained in:
parent
f1bd1a25ac
commit
4b5cf3420a
3 changed files with 29 additions and 29 deletions
29
llama/compat/llama-ollama-compat.cpp
vendored
29
llama/compat/llama-ollama-compat.cpp
vendored
|
|
@ -17,6 +17,12 @@ using namespace llama_ollama_compat::detail; // pull detail:: helpers into scope
|
|||
|
||||
namespace {
|
||||
|
||||
// Per-loader file path registry — set by translate_metadata, read by
|
||||
// maybe_load_text_tensor so it can pass the path to load ops without a
|
||||
// separate patch insertion in the model loader's load_all_data path.
|
||||
std::mutex g_loader_path_mutex;
|
||||
std::unordered_map<const llama_model_loader *, std::string> g_loader_paths;
|
||||
|
||||
// =========================================================================
|
||||
// gemma3 (text side)
|
||||
// =========================================================================
|
||||
|
|
@ -1297,8 +1303,13 @@ void handle_mistral3_clip(gguf_context * meta, ggml_context * ctx) {
|
|||
void translate_metadata(const llama_model_loader * ml,
|
||||
gguf_context * meta,
|
||||
ggml_context * ctx,
|
||||
std::string & arch_name) {
|
||||
std::string & arch_name,
|
||||
const char * fname) {
|
||||
if (!meta) return;
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(g_loader_path_mutex);
|
||||
g_loader_paths[ml] = fname ? fname : "";
|
||||
}
|
||||
if (arch_name == "gemma3") handle_gemma3 (ml, meta, ctx);
|
||||
if (arch_name == "gemma4") handle_gemma4 (ml, meta, ctx);
|
||||
if (arch_name == "qwen35moe") handle_qwen35moe(ml, meta, ctx);
|
||||
|
|
@ -1375,20 +1386,9 @@ bool maybe_load_tensor(ggml_tensor * cur,
|
|||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::mutex g_loader_path_mutex;
|
||||
std::unordered_map<const llama_model_loader *, std::string> g_loader_paths;
|
||||
}
|
||||
|
||||
void set_loader_path(const llama_model_loader * ml, const char * fname) {
|
||||
std::lock_guard<std::mutex> lk(g_loader_path_mutex);
|
||||
g_loader_paths[ml] = fname ? fname : "";
|
||||
}
|
||||
|
||||
bool maybe_load_text_tensor(const llama_model_loader * ml,
|
||||
ggml_tensor * cur,
|
||||
size_t file_offset,
|
||||
ggml_backend_buffer_type_t buft) {
|
||||
size_t file_offset) {
|
||||
std::string path;
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(g_loader_path_mutex);
|
||||
|
|
@ -1396,6 +1396,9 @@ bool maybe_load_text_tensor(const llama_model_loader * ml,
|
|||
if (it == g_loader_paths.end() || it->second.empty()) return false;
|
||||
path = it->second;
|
||||
}
|
||||
ggml_backend_buffer_type_t buft = cur->buffer
|
||||
? ggml_backend_buffer_get_type(cur->buffer)
|
||||
: nullptr;
|
||||
return maybe_load_tensor(cur, path.c_str(), file_offset, buft);
|
||||
}
|
||||
|
||||
|
|
|
|||
18
llama/compat/llama-ollama-compat.h
vendored
18
llama/compat/llama-ollama-compat.h
vendored
|
|
@ -35,10 +35,13 @@ struct llama_model_loader;
|
|||
namespace llama_ollama_compat {
|
||||
|
||||
// Called from llama_model_loader's constructor, right after the arch is read.
|
||||
// `fname` is the model file path, captured here so later load-time hooks
|
||||
// (maybe_load_text_tensor) can read raw bytes from it.
|
||||
void translate_metadata(const llama_model_loader * ml,
|
||||
gguf_context * meta,
|
||||
ggml_context * ctx,
|
||||
std::string & arch_name);
|
||||
std::string & arch_name,
|
||||
const char * fname);
|
||||
|
||||
// Called from llama_model_loader's weights_map population loop. Returns
|
||||
// true to drop a tensor from the loader — used to hide embedded vision
|
||||
|
|
@ -60,15 +63,12 @@ bool maybe_load_tensor(ggml_tensor * cur,
|
|||
size_t file_offset,
|
||||
ggml_backend_buffer_type_t buft);
|
||||
|
||||
// Same as maybe_load_tensor but for the text-side llama_model_loader,
|
||||
// which doesn't have the clip loader's `fname` in scope at the read
|
||||
// site. Looks up the model's file path from a per-loader registry
|
||||
// populated by `set_loader_path` (called from the model loader's
|
||||
// constructor right after `fname` is in scope).
|
||||
// Text-side counterpart to maybe_load_tensor. Self-contained: looks up
|
||||
// the model file path from the per-loader registry populated by
|
||||
// translate_metadata, and derives the buffer type from cur->buffer
|
||||
// internally — keeps the call site (and the upstream patch) to one line.
|
||||
bool maybe_load_text_tensor(const llama_model_loader * ml,
|
||||
ggml_tensor * cur,
|
||||
size_t file_offset,
|
||||
ggml_backend_buffer_type_t buft);
|
||||
void set_loader_path(const llama_model_loader * ml, const char * fname);
|
||||
size_t file_offset);
|
||||
|
||||
} // namespace llama_ollama_compat
|
||||
|
|
|
|||
|
|
@ -10,12 +10,11 @@ index 4e65a45a5..75836c683 100644
|
|||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
@@ -549,6 +550,8 @@ llama_model_loader::llama_model_loader(
|
||||
@@ -549,6 +550,7 @@ llama_model_loader::llama_model_loader(
|
||||
}
|
||||
|
||||
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
|
||||
+ llama_ollama_compat::set_loader_path(this, fname.c_str());
|
||||
+ llama_ollama_compat::translate_metadata(this, metadata, ctx, arch_name);
|
||||
+ llama_ollama_compat::translate_metadata(this, metadata, ctx, arch_name, fname.c_str());
|
||||
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
|
||||
|
||||
files.emplace_back(new llama_file(fname.c_str(), "rb", use_direct_io));
|
||||
|
|
@ -39,12 +38,10 @@ index 4e65a45a5..75836c683 100644
|
|||
// make sure there is no duplicated tensor names
|
||||
if (weights_map.find(tensor_name) != weights_map.end()) {
|
||||
throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur)));
|
||||
@@ -1535,3 +1543,6 @@ bool llama_model_loader::load_all_data(
|
||||
@@ -1535,3 +1542,4 @@ bool llama_model_loader::load_all_data(
|
||||
size_t n_size = ggml_nbytes(cur);
|
||||
|
||||
+ if (llama_ollama_compat::maybe_load_text_tensor(this, cur, weight->offs, cur->buffer ? ggml_backend_buffer_get_type(cur->buffer) : nullptr)) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (llama_ollama_compat::maybe_load_text_tensor(this, cur, weight->offs)) continue;
|
||||
if (use_mmap) {
|
||||
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
|
||||
index f0e8786b6..35defa89d 100644
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue