From b6447caebcd7053186cc4c831f13c9e6feec28ee Mon Sep 17 00:00:00 2001 From: Parth Sareen Date: Wed, 29 Apr 2026 18:40:14 -0700 Subject: [PATCH] launch: use vram bytes for model recommendations (#15885) --- api/types.go | 2 +- cmd/launch/integrations_test.go | 4 ++-- cmd/launch/launch.go | 4 ++-- cmd/launch/models.go | 21 +++++++++++++++++---- server/model_recommendations.go | 10 ++++++---- server/model_recommendations_test.go | 15 ++++++++------- 6 files changed, 36 insertions(+), 20 deletions(-) diff --git a/api/types.go b/api/types.go index 72dbf6ca6..7fc53b370 100644 --- a/api/types.go +++ b/api/types.go @@ -813,7 +813,7 @@ type ModelRecommendation struct { Description string `json:"description"` ContextLength int `json:"context_length,omitempty"` MaxOutputTokens int `json:"max_output_tokens,omitempty"` - VRAM string `json:"vram,omitempty"` + VRAMBytes int64 `json:"vram_bytes,omitempty"` } // ProcessResponse is the response from [Client.Process]. diff --git a/cmd/launch/integrations_test.go b/cmd/launch/integrations_test.go index 123773fca..60c8ba9fb 100644 --- a/cmd/launch/integrations_test.go +++ b/cmd/launch/integrations_test.go @@ -1659,7 +1659,7 @@ func TestBuildModelList_Descriptions(t *testing.T) { for _, item := range items { if item.Name == "qwen3.5" { - if !strings.Contains(item.Description, "~11GB") { + if !strings.Contains(item.Description, "~14GB") { t.Errorf("not-installed qwen3.5 should show VRAM hint, got %q", item.Description) } return @@ -1676,7 +1676,7 @@ func TestBuildModelList_Descriptions(t *testing.T) { for _, item := range items { if item.Name == "qwen3.5" { - if strings.Contains(item.Description, "~11GB") { + if strings.Contains(item.Description, "~14GB") { t.Errorf("installed qwen3.5 should not show VRAM hint, got %q", item.Description) } return diff --git a/cmd/launch/launch.go b/cmd/launch/launch.go index fefaf99aa..524b714df 100644 --- a/cmd/launch/launch.go +++ b/cmd/launch/launch.go @@ -186,7 +186,7 @@ type ModelItem struct { Name string Description string Recommended bool - VRAM string + VRAMBytes int64 ContextLength int MaxOutputTokens int } @@ -783,7 +783,7 @@ func (c *launcherClient) requestRecommendations(ctx context.Context) ([]ModelIte Name: name, Description: description, Recommended: true, - VRAM: strings.TrimSpace(rec.VRAM), + VRAMBytes: rec.VRAMBytes, ContextLength: rec.ContextLength, MaxOutputTokens: rec.MaxOutputTokens, }) diff --git a/cmd/launch/models.go b/cmd/launch/models.go index 00819efe2..f985950d7 100644 --- a/cmd/launch/models.go +++ b/cmd/launch/models.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "math" "net/http" "os" "os/exec" @@ -16,6 +17,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/cmd/config" "github.com/ollama/ollama/cmd/internal/fileutil" + "github.com/ollama/ollama/format" internalcloud "github.com/ollama/ollama/internal/cloud" "github.com/ollama/ollama/internal/modelref" "github.com/ollama/ollama/progress" @@ -26,8 +28,19 @@ var recommendedModels = []ModelItem{ {Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true, ContextLength: 262_144, MaxOutputTokens: 32_768}, {Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true, ContextLength: 202_752, MaxOutputTokens: 131_072}, {Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true, ContextLength: 204_800, MaxOutputTokens: 128_000}, - {Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"}, - {Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"}, + {Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAMBytes: 12 * format.GigaByte}, + {Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAMBytes: 14 * format.GigaByte}, +} + +func displayVRAM(vramBytes int64) string { + if vramBytes <= 0 { + return "" + } + gb := float64(vramBytes) / format.GigaByte + if gb == math.Trunc(gb) { + return fmt.Sprintf("~%.0fGB", gb) + } + return fmt.Sprintf("~%.1fGB", gb) } // cloudModelLimit holds context and output token limits for a cloud model. @@ -403,8 +416,8 @@ func buildModelListWithRecommendations(existing []modelInfo, recommendations []M if items[i].Description != "" { parts = append(parts, items[i].Description) } - if items[i].VRAM != "" { - parts = append(parts, items[i].VRAM) + if vram := displayVRAM(items[i].VRAMBytes); vram != "" { + parts = append(parts, vram) } parts = append(parts, "(not downloaded)") items[i].Description = strings.Join(parts, ", ") diff --git a/server/model_recommendations.go b/server/model_recommendations.go index 04aee795d..b800ad4c1 100644 --- a/server/model_recommendations.go +++ b/server/model_recommendations.go @@ -17,9 +17,12 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/format" ) -const modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations" +const ( + modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations" +) var ( modelRecommendationsRefreshInterval = 4 * time.Hour @@ -320,7 +323,6 @@ func validateModelRecommendations(recs []api.ModelRecommendation) ([]api.ModelRe for _, rec := range recs { rec.Model = strings.TrimSpace(rec.Model) rec.Description = strings.TrimSpace(rec.Description) - rec.VRAM = strings.TrimSpace(rec.VRAM) if rec.Model == "" { return nil, errors.New("recommendation missing model") @@ -391,11 +393,11 @@ var defaultModelRecommendations = []api.ModelRecommendation{ { Model: "gemma4", Description: "Reasoning and code generation locally", - VRAM: "~16GB", + VRAMBytes: 12 * format.GigaByte, }, { Model: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", - VRAM: "~11GB", + VRAMBytes: 14 * format.GigaByte, }, } diff --git a/server/model_recommendations_test.go b/server/model_recommendations_test.go index da9e716eb..346462fc4 100644 --- a/server/model_recommendations_test.go +++ b/server/model_recommendations_test.go @@ -19,6 +19,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/format" ) func TestModelRecommendationsDefaultOrder(t *testing.T) { @@ -41,11 +42,11 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T) first := []api.ModelRecommendation{ {Model: " first-cloud:cloud ", Description: " first ", ContextLength: 2048, MaxOutputTokens: 512}, - {Model: " first-local ", Description: " first local ", VRAM: " ~3GB "}, + {Model: " first-local ", Description: " first local ", VRAMBytes: 3 * format.GigaByte}, } second := []api.ModelRecommendation{ {Model: "second-cloud:cloud", Description: "second", ContextLength: 4096, MaxOutputTokens: 1024}, - {Model: "second-local", Description: "second local", VRAM: "~6GB"}, + {Model: "second-local", Description: "second local", VRAMBytes: 6 * format.GigaByte}, } calls := 0 @@ -76,7 +77,7 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T) } if got, want := cache.Get(), []api.ModelRecommendation{ {Model: "first-cloud:cloud", Description: "first", ContextLength: 2048, MaxOutputTokens: 512}, - {Model: "first-local", Description: "first local", VRAM: "~3GB"}, + {Model: "first-local", Description: "first local", VRAMBytes: 3 * format.GigaByte}, }; !slices.Equal(got, want) { t.Fatalf("after first refresh recommendations = %#v, want %#v", got, want) } @@ -160,7 +161,7 @@ func TestModelRecommendationsCacheRefreshErrorCasesPreserveCurrentData(t *testin setupModelRecommendationsTestEnv(t, "") cache := newModelRecommendationsCache() - stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAM: "~2GB"}} + stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAMBytes: 2 * format.GigaByte}} cache.set(stable) cache.client = &http.Client{Transport: tc.transport} @@ -211,7 +212,7 @@ func TestModelRecommendationsSnapshotPersistAndLoad(t *testing.T) { want := []api.ModelRecommendation{ {Model: "persist-cloud:cloud", Description: "persisted", ContextLength: 8192, MaxOutputTokens: 2048}, - {Model: "persist-local", Description: "persisted local", VRAM: "~5GB"}, + {Model: "persist-local", Description: "persisted local", VRAMBytes: 5 * format.GigaByte}, } writer := newModelRecommendationsCache() @@ -256,7 +257,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing input := []api.ModelRecommendation{ {Model: " good-cloud:cloud ", Description: " good cloud ", ContextLength: 1024, MaxOutputTokens: 256}, {Model: "bad-cloud:cloud", Description: "missing limits"}, - {Model: " good-local ", Description: " good local ", VRAM: " ~2GB "}, + {Model: " good-local ", Description: " good local ", VRAMBytes: 2 * format.GigaByte}, } got, err := validateModelRecommendations(input) @@ -266,7 +267,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing want := []api.ModelRecommendation{ {Model: "good-cloud:cloud", Description: "good cloud", ContextLength: 1024, MaxOutputTokens: 256}, - {Model: "good-local", Description: "good local", VRAM: "~2GB"}, + {Model: "good-local", Description: "good local", VRAMBytes: 2 * format.GigaByte}, } if !slices.Equal(got, want) { t.Fatalf("validated recommendations = %#v, want %#v", got, want)