mirror of
https://github.com/ollama/ollama.git
synced 2026-05-13 14:27:00 +00:00
llm,server: route Ollama-format gemma3 blobs through llama/compat
Two tiny Go-side changes that let the llama/compat shim take over gemma3: 1. llm/llama_server.go: when the GGUF has embedded v.* tensors and no projector layer is declared, pass the model file itself as --mmproj. The in-process compat layer translates the same file into both a text-only view (for --model) and a clip-mmproj view (for --mmproj). 2. server/model_resolver.go: drop library/gemma3 from compatModelRedirects. The compat layer handles it directly, so no dhiltgen/ republish is needed. Other arches stay in the redirect list until they get their own handler in llama/compat/llama-ollama-compat.cpp. End-to-end verified: `ollama run gemma3` answers text and image prompts against the existing library/gemma3 blob with no re-download.
This commit is contained in:
parent
25223160d8
commit
7449b539ab
2 changed files with 16 additions and 2 deletions
|
|
@ -423,6 +423,15 @@ func NewLlamaServerRunner(
|
|||
// Check if this is an embedding model
|
||||
_, isEmbedding := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]
|
||||
|
||||
// Older Ollama-format GGUFs store vision tensors (v.*, mm.*) inline in
|
||||
// the main model file rather than in a separate projector layer. Detect
|
||||
// this case and point --mmproj at the model itself — the in-process
|
||||
// llama.cpp compat shim translates the same file into both a text-only
|
||||
// view and a clip-mmproj view. See llama/compat/ for details.
|
||||
if len(projectors) == 0 && len(f.Tensors().Items("v.")) > 0 {
|
||||
projectors = []string{modelPath}
|
||||
}
|
||||
|
||||
gpuLibs := ml.LibraryPaths(gpus)
|
||||
status := NewStatusWriter(os.Stderr)
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,15 @@ import (
|
|||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
|
||||
// Temporary redirection logic to map incompatible library models to compatible versions
|
||||
// Temporary redirection logic to map incompatible library models to compatible versions.
|
||||
//
|
||||
// Architectures listed here are handled via republished blobs under the
|
||||
// dhiltgen/ namespace. Once llama/compat/ grows a handler for an arch, its
|
||||
// entry should be removed from this list — the compat layer translates the
|
||||
// original library/ blob in memory so no republish is needed.
|
||||
var compatModelRedirects = []struct{ from, to string }{
|
||||
{"library/gpt-oss", "dhiltgen/gpt-oss"},
|
||||
{"library/gemma3", "dhiltgen/gemma3"},
|
||||
// library/gemma3 — handled by llama/compat (text + vision).
|
||||
{"library/embeddinggemma", "dhiltgen/embeddinggemma"},
|
||||
{"library/snowflake-arctic-embed2", "dhiltgen/snowflake-arctic-embed2"},
|
||||
{"library/gemma3n", "dhiltgen/gemma3n"},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue