mirror of
https://github.com/ollama/ollama.git
synced 2026-05-13 14:27:00 +00:00
launch: use vram bytes for model recommendations (#15885)
This commit is contained in:
parent
bad32c7244
commit
b6447caebc
6 changed files with 36 additions and 20 deletions
|
|
@ -813,7 +813,7 @@ type ModelRecommendation struct {
|
|||
Description string `json:"description"`
|
||||
ContextLength int `json:"context_length,omitempty"`
|
||||
MaxOutputTokens int `json:"max_output_tokens,omitempty"`
|
||||
VRAM string `json:"vram,omitempty"`
|
||||
VRAMBytes int64 `json:"vram_bytes,omitempty"`
|
||||
}
|
||||
|
||||
// ProcessResponse is the response from [Client.Process].
|
||||
|
|
|
|||
|
|
@ -1659,7 +1659,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
|
|||
|
||||
for _, item := range items {
|
||||
if item.Name == "qwen3.5" {
|
||||
if !strings.Contains(item.Description, "~11GB") {
|
||||
if !strings.Contains(item.Description, "~14GB") {
|
||||
t.Errorf("not-installed qwen3.5 should show VRAM hint, got %q", item.Description)
|
||||
}
|
||||
return
|
||||
|
|
@ -1676,7 +1676,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
|
|||
|
||||
for _, item := range items {
|
||||
if item.Name == "qwen3.5" {
|
||||
if strings.Contains(item.Description, "~11GB") {
|
||||
if strings.Contains(item.Description, "~14GB") {
|
||||
t.Errorf("installed qwen3.5 should not show VRAM hint, got %q", item.Description)
|
||||
}
|
||||
return
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ type ModelItem struct {
|
|||
Name string
|
||||
Description string
|
||||
Recommended bool
|
||||
VRAM string
|
||||
VRAMBytes int64
|
||||
ContextLength int
|
||||
MaxOutputTokens int
|
||||
}
|
||||
|
|
@ -783,7 +783,7 @@ func (c *launcherClient) requestRecommendations(ctx context.Context) ([]ModelIte
|
|||
Name: name,
|
||||
Description: description,
|
||||
Recommended: true,
|
||||
VRAM: strings.TrimSpace(rec.VRAM),
|
||||
VRAMBytes: rec.VRAMBytes,
|
||||
ContextLength: rec.ContextLength,
|
||||
MaxOutputTokens: rec.MaxOutputTokens,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
|
@ -16,6 +17,7 @@ import (
|
|||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/cmd/config"
|
||||
"github.com/ollama/ollama/cmd/internal/fileutil"
|
||||
"github.com/ollama/ollama/format"
|
||||
internalcloud "github.com/ollama/ollama/internal/cloud"
|
||||
"github.com/ollama/ollama/internal/modelref"
|
||||
"github.com/ollama/ollama/progress"
|
||||
|
|
@ -26,8 +28,19 @@ var recommendedModels = []ModelItem{
|
|||
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true, ContextLength: 262_144, MaxOutputTokens: 32_768},
|
||||
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true, ContextLength: 202_752, MaxOutputTokens: 131_072},
|
||||
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true, ContextLength: 204_800, MaxOutputTokens: 128_000},
|
||||
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
|
||||
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
|
||||
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAMBytes: 12 * format.GigaByte},
|
||||
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAMBytes: 14 * format.GigaByte},
|
||||
}
|
||||
|
||||
func displayVRAM(vramBytes int64) string {
|
||||
if vramBytes <= 0 {
|
||||
return ""
|
||||
}
|
||||
gb := float64(vramBytes) / format.GigaByte
|
||||
if gb == math.Trunc(gb) {
|
||||
return fmt.Sprintf("~%.0fGB", gb)
|
||||
}
|
||||
return fmt.Sprintf("~%.1fGB", gb)
|
||||
}
|
||||
|
||||
// cloudModelLimit holds context and output token limits for a cloud model.
|
||||
|
|
@ -403,8 +416,8 @@ func buildModelListWithRecommendations(existing []modelInfo, recommendations []M
|
|||
if items[i].Description != "" {
|
||||
parts = append(parts, items[i].Description)
|
||||
}
|
||||
if items[i].VRAM != "" {
|
||||
parts = append(parts, items[i].VRAM)
|
||||
if vram := displayVRAM(items[i].VRAMBytes); vram != "" {
|
||||
parts = append(parts, vram)
|
||||
}
|
||||
parts = append(parts, "(not downloaded)")
|
||||
items[i].Description = strings.Join(parts, ", ")
|
||||
|
|
|
|||
|
|
@ -17,9 +17,12 @@ import (
|
|||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
const modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"
|
||||
const (
|
||||
modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"
|
||||
)
|
||||
|
||||
var (
|
||||
modelRecommendationsRefreshInterval = 4 * time.Hour
|
||||
|
|
@ -320,7 +323,6 @@ func validateModelRecommendations(recs []api.ModelRecommendation) ([]api.ModelRe
|
|||
for _, rec := range recs {
|
||||
rec.Model = strings.TrimSpace(rec.Model)
|
||||
rec.Description = strings.TrimSpace(rec.Description)
|
||||
rec.VRAM = strings.TrimSpace(rec.VRAM)
|
||||
|
||||
if rec.Model == "" {
|
||||
return nil, errors.New("recommendation missing model")
|
||||
|
|
@ -391,11 +393,11 @@ var defaultModelRecommendations = []api.ModelRecommendation{
|
|||
{
|
||||
Model: "gemma4",
|
||||
Description: "Reasoning and code generation locally",
|
||||
VRAM: "~16GB",
|
||||
VRAMBytes: 12 * format.GigaByte,
|
||||
},
|
||||
{
|
||||
Model: "qwen3.5",
|
||||
Description: "Reasoning, coding, and visual understanding locally",
|
||||
VRAM: "~11GB",
|
||||
VRAMBytes: 14 * format.GigaByte,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import (
|
|||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
func TestModelRecommendationsDefaultOrder(t *testing.T) {
|
||||
|
|
@ -41,11 +42,11 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T)
|
|||
|
||||
first := []api.ModelRecommendation{
|
||||
{Model: " first-cloud:cloud ", Description: " first ", ContextLength: 2048, MaxOutputTokens: 512},
|
||||
{Model: " first-local ", Description: " first local ", VRAM: " ~3GB "},
|
||||
{Model: " first-local ", Description: " first local ", VRAMBytes: 3 * format.GigaByte},
|
||||
}
|
||||
second := []api.ModelRecommendation{
|
||||
{Model: "second-cloud:cloud", Description: "second", ContextLength: 4096, MaxOutputTokens: 1024},
|
||||
{Model: "second-local", Description: "second local", VRAM: "~6GB"},
|
||||
{Model: "second-local", Description: "second local", VRAMBytes: 6 * format.GigaByte},
|
||||
}
|
||||
|
||||
calls := 0
|
||||
|
|
@ -76,7 +77,7 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T)
|
|||
}
|
||||
if got, want := cache.Get(), []api.ModelRecommendation{
|
||||
{Model: "first-cloud:cloud", Description: "first", ContextLength: 2048, MaxOutputTokens: 512},
|
||||
{Model: "first-local", Description: "first local", VRAM: "~3GB"},
|
||||
{Model: "first-local", Description: "first local", VRAMBytes: 3 * format.GigaByte},
|
||||
}; !slices.Equal(got, want) {
|
||||
t.Fatalf("after first refresh recommendations = %#v, want %#v", got, want)
|
||||
}
|
||||
|
|
@ -160,7 +161,7 @@ func TestModelRecommendationsCacheRefreshErrorCasesPreserveCurrentData(t *testin
|
|||
setupModelRecommendationsTestEnv(t, "")
|
||||
|
||||
cache := newModelRecommendationsCache()
|
||||
stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAM: "~2GB"}}
|
||||
stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAMBytes: 2 * format.GigaByte}}
|
||||
cache.set(stable)
|
||||
cache.client = &http.Client{Transport: tc.transport}
|
||||
|
||||
|
|
@ -211,7 +212,7 @@ func TestModelRecommendationsSnapshotPersistAndLoad(t *testing.T) {
|
|||
|
||||
want := []api.ModelRecommendation{
|
||||
{Model: "persist-cloud:cloud", Description: "persisted", ContextLength: 8192, MaxOutputTokens: 2048},
|
||||
{Model: "persist-local", Description: "persisted local", VRAM: "~5GB"},
|
||||
{Model: "persist-local", Description: "persisted local", VRAMBytes: 5 * format.GigaByte},
|
||||
}
|
||||
|
||||
writer := newModelRecommendationsCache()
|
||||
|
|
@ -256,7 +257,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing
|
|||
input := []api.ModelRecommendation{
|
||||
{Model: " good-cloud:cloud ", Description: " good cloud ", ContextLength: 1024, MaxOutputTokens: 256},
|
||||
{Model: "bad-cloud:cloud", Description: "missing limits"},
|
||||
{Model: " good-local ", Description: " good local ", VRAM: " ~2GB "},
|
||||
{Model: " good-local ", Description: " good local ", VRAMBytes: 2 * format.GigaByte},
|
||||
}
|
||||
|
||||
got, err := validateModelRecommendations(input)
|
||||
|
|
@ -266,7 +267,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing
|
|||
|
||||
want := []api.ModelRecommendation{
|
||||
{Model: "good-cloud:cloud", Description: "good cloud", ContextLength: 1024, MaxOutputTokens: 256},
|
||||
{Model: "good-local", Description: "good local", VRAM: "~2GB"},
|
||||
{Model: "good-local", Description: "good local", VRAMBytes: 2 * format.GigaByte},
|
||||
}
|
||||
if !slices.Equal(got, want) {
|
||||
t.Fatalf("validated recommendations = %#v, want %#v", got, want)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue