mirror of
https://github.com/ollama/ollama.git
synced 2026-05-13 14:27:00 +00:00
This change adds support for MTP (multi-token prediction) speculative decoding for the gemma4 model family. It includes: * support for importing safetensors based gemma4 draft models with `ollama create` * a new DRAFT command in the Modelfile for specifying draft models * a --quantize-draft flag for the ollama create command to quantize the draft model * cache support for speculation * changes to the rotating cache to be able to handle MTP correctly * sampling support for draft model token prediction --------- Co-authored-by: Daniel Hiltgen <daniel@ollama.com>
42 lines
1.5 KiB
Go
42 lines
1.5 KiB
Go
package model
|
|
|
|
// ConfigV2 represents the configuration metadata for a model.
|
|
type ConfigV2 struct {
|
|
ModelFormat string `json:"model_format"`
|
|
ModelFamily string `json:"model_family"`
|
|
ModelFamilies []string `json:"model_families"`
|
|
ModelType string `json:"model_type"` // shown as Parameter Size
|
|
FileType string `json:"file_type"` // shown as Quantization Level
|
|
Renderer string `json:"renderer,omitempty"`
|
|
Parser string `json:"parser,omitempty"`
|
|
Requires string `json:"requires,omitempty"`
|
|
|
|
RemoteHost string `json:"remote_host,omitempty"`
|
|
RemoteModel string `json:"remote_model,omitempty"`
|
|
|
|
// used for remotes
|
|
Capabilities []string `json:"capabilities,omitempty"`
|
|
ContextLen int `json:"context_length,omitempty"`
|
|
EmbedLen int `json:"embedding_length,omitempty"`
|
|
BaseName string `json:"base_name,omitempty"`
|
|
Draft *Draft `json:"draft,omitempty"`
|
|
|
|
// required by spec
|
|
Architecture string `json:"architecture"`
|
|
OS string `json:"os"`
|
|
RootFS RootFS `json:"rootfs"`
|
|
}
|
|
|
|
// Draft describes an auxiliary draft model stored in the same manifest.
|
|
type Draft struct {
|
|
ModelFormat string `json:"model_format,omitempty"`
|
|
Architecture string `json:"architecture,omitempty"`
|
|
TensorPrefix string `json:"tensor_prefix,omitempty"`
|
|
Config string `json:"config,omitempty"`
|
|
}
|
|
|
|
// RootFS represents the root filesystem configuration for a model.
|
|
type RootFS struct {
|
|
Type string `json:"type"`
|
|
DiffIDs []string `json:"diff_ids"`
|
|
}
|