mirror of
https://github.com/ollama/ollama.git
synced 2026-05-13 14:27:00 +00:00
* mlx: add laguna model support * convert: support fp8 safetensors import Decode HF F8_E4M3 safetensors with block scale companions into GGUF-supported tensor types, and record which output tensors came from FP8 source weights. Use that source-precision metadata during create quantization: default FP8-sourced GGUFs to Q8_0, keep non-FP8 tensors at their original precision for Q8_0, and promote non-FP8 quantizable tensors to Q8_0 for Q4_K requests. * ggml: add laguna model support * server: preserve generate logprobs with builtin parsers Generate requests were dropping logprob-only chunks whenever a builtin parser buffered visible content. Chat already handled this case, but generate only forwarded chunks with visible response, thinking, or tool-call output. Keep generate chunks that carry logprobs even when the builtin parser has not flushed visible content yet, and add a regression test that exercises the behavior with a generic thinking parser. * review comments - perf improvements * ggml: implement nemotron 3 nano omni * add poolside integration * update poolside doc * adapt to new cache setup * fix test * fix test --------- Co-authored-by: Eva Ho <hoyyeva@gmail.com>
28 lines
1.2 KiB
Go
28 lines
1.2 KiB
Go
package models
|
|
|
|
import (
|
|
_ "github.com/ollama/ollama/model/models/bert"
|
|
_ "github.com/ollama/ollama/model/models/deepseek2"
|
|
_ "github.com/ollama/ollama/model/models/deepseekocr"
|
|
_ "github.com/ollama/ollama/model/models/gemma2"
|
|
_ "github.com/ollama/ollama/model/models/gemma3"
|
|
_ "github.com/ollama/ollama/model/models/gemma3n"
|
|
_ "github.com/ollama/ollama/model/models/gemma4"
|
|
_ "github.com/ollama/ollama/model/models/glm4moelite"
|
|
_ "github.com/ollama/ollama/model/models/glmocr"
|
|
_ "github.com/ollama/ollama/model/models/gptoss"
|
|
_ "github.com/ollama/ollama/model/models/laguna"
|
|
_ "github.com/ollama/ollama/model/models/lfm2"
|
|
_ "github.com/ollama/ollama/model/models/llama"
|
|
_ "github.com/ollama/ollama/model/models/llama4"
|
|
_ "github.com/ollama/ollama/model/models/mistral3"
|
|
_ "github.com/ollama/ollama/model/models/mllama"
|
|
_ "github.com/ollama/ollama/model/models/nemotronh"
|
|
_ "github.com/ollama/ollama/model/models/nomicbert"
|
|
_ "github.com/ollama/ollama/model/models/olmo3"
|
|
_ "github.com/ollama/ollama/model/models/qwen2"
|
|
_ "github.com/ollama/ollama/model/models/qwen25vl"
|
|
_ "github.com/ollama/ollama/model/models/qwen3"
|
|
_ "github.com/ollama/ollama/model/models/qwen3next"
|
|
_ "github.com/ollama/ollama/model/models/qwen3vl"
|
|
)
|