From b6447caebcd7053186cc4c831f13c9e6feec28ee Mon Sep 17 00:00:00 2001
From: Parth Sareen <parth.sareen@ollama.com>
Date: Wed, 29 Apr 2026 18:40:14 -0700
Subject: [PATCH] launch: use vram bytes for model recommendations (#15885)

---
 api/types.go                         |  2 +-
 cmd/launch/integrations_test.go      |  4 ++--
 cmd/launch/launch.go                 |  4 ++--
 cmd/launch/models.go                 | 21 +++++++++++++++++----
 server/model_recommendations.go      | 10 ++++++----
 server/model_recommendations_test.go | 15 ++++++++-------
 6 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/api/types.go b/api/types.go
index 72dbf6ca6..7fc53b370 100644
--- a/api/types.go
+++ b/api/types.go
@@ -813,7 +813,7 @@ type ModelRecommendation struct {
 	Description     string `json:"description"`
 	ContextLength   int    `json:"context_length,omitempty"`
 	MaxOutputTokens int    `json:"max_output_tokens,omitempty"`
-	VRAM            string `json:"vram,omitempty"`
+	VRAMBytes       int64  `json:"vram_bytes,omitempty"`
 }
 
 // ProcessResponse is the response from [Client.Process].
diff --git a/cmd/launch/integrations_test.go b/cmd/launch/integrations_test.go
index 123773fca..60c8ba9fb 100644
--- a/cmd/launch/integrations_test.go
+++ b/cmd/launch/integrations_test.go
@@ -1659,7 +1659,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
 
 		for _, item := range items {
 			if item.Name == "qwen3.5" {
-				if !strings.Contains(item.Description, "~11GB") {
+				if !strings.Contains(item.Description, "~14GB") {
 					t.Errorf("not-installed qwen3.5 should show VRAM hint, got %q", item.Description)
 				}
 				return
@@ -1676,7 +1676,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
 
 		for _, item := range items {
 			if item.Name == "qwen3.5" {
-				if strings.Contains(item.Description, "~11GB") {
+				if strings.Contains(item.Description, "~14GB") {
 					t.Errorf("installed qwen3.5 should not show VRAM hint, got %q", item.Description)
 				}
 				return
diff --git a/cmd/launch/launch.go b/cmd/launch/launch.go
index fefaf99aa..524b714df 100644
--- a/cmd/launch/launch.go
+++ b/cmd/launch/launch.go
@@ -186,7 +186,7 @@ type ModelItem struct {
 	Name            string
 	Description     string
 	Recommended     bool
-	VRAM            string
+	VRAMBytes       int64
 	ContextLength   int
 	MaxOutputTokens int
 }
@@ -783,7 +783,7 @@ func (c *launcherClient) requestRecommendations(ctx context.Context) ([]ModelIte
 			Name:            name,
 			Description:     description,
 			Recommended:     true,
-			VRAM:            strings.TrimSpace(rec.VRAM),
+			VRAMBytes:       rec.VRAMBytes,
 			ContextLength:   rec.ContextLength,
 			MaxOutputTokens: rec.MaxOutputTokens,
 		})
diff --git a/cmd/launch/models.go b/cmd/launch/models.go
index 00819efe2..f985950d7 100644
--- a/cmd/launch/models.go
+++ b/cmd/launch/models.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"math"
 	"net/http"
 	"os"
 	"os/exec"
@@ -16,6 +17,7 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/cmd/config"
 	"github.com/ollama/ollama/cmd/internal/fileutil"
+	"github.com/ollama/ollama/format"
 	internalcloud "github.com/ollama/ollama/internal/cloud"
 	"github.com/ollama/ollama/internal/modelref"
 	"github.com/ollama/ollama/progress"
@@ -26,8 +28,19 @@ var recommendedModels = []ModelItem{
 	{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true, ContextLength: 262_144, MaxOutputTokens: 32_768},
 	{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true, ContextLength: 202_752, MaxOutputTokens: 131_072},
 	{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true, ContextLength: 204_800, MaxOutputTokens: 128_000},
-	{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
-	{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
+	{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAMBytes: 12 * format.GigaByte},
+	{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAMBytes: 14 * format.GigaByte},
+}
+
+func displayVRAM(vramBytes int64) string {
+	if vramBytes <= 0 {
+		return ""
+	}
+	gb := float64(vramBytes) / format.GigaByte
+	if gb == math.Trunc(gb) {
+		return fmt.Sprintf("~%.0fGB", gb)
+	}
+	return fmt.Sprintf("~%.1fGB", gb)
 }
 
 // cloudModelLimit holds context and output token limits for a cloud model.
@@ -403,8 +416,8 @@ func buildModelListWithRecommendations(existing []modelInfo, recommendations []M
 			if items[i].Description != "" {
 				parts = append(parts, items[i].Description)
 			}
-			if items[i].VRAM != "" {
-				parts = append(parts, items[i].VRAM)
+			if vram := displayVRAM(items[i].VRAMBytes); vram != "" {
+				parts = append(parts, vram)
 			}
 			parts = append(parts, "(not downloaded)")
 			items[i].Description = strings.Join(parts, ", ")
diff --git a/server/model_recommendations.go b/server/model_recommendations.go
index 04aee795d..b800ad4c1 100644
--- a/server/model_recommendations.go
+++ b/server/model_recommendations.go
@@ -17,9 +17,12 @@ import (
 
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
+	"github.com/ollama/ollama/format"
 )
 
-const modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"
+const (
+	modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"
+)
 
 var (
 	modelRecommendationsRefreshInterval     = 4 * time.Hour
@@ -320,7 +323,6 @@ func validateModelRecommendations(recs []api.ModelRecommendation) ([]api.ModelRe
 	for _, rec := range recs {
 		rec.Model = strings.TrimSpace(rec.Model)
 		rec.Description = strings.TrimSpace(rec.Description)
-		rec.VRAM = strings.TrimSpace(rec.VRAM)
 
 		if rec.Model == "" {
 			return nil, errors.New("recommendation missing model")
@@ -391,11 +393,11 @@ var defaultModelRecommendations = []api.ModelRecommendation{
 	{
 		Model:       "gemma4",
 		Description: "Reasoning and code generation locally",
-		VRAM:        "~16GB",
+		VRAMBytes:   12 * format.GigaByte,
 	},
 	{
 		Model:       "qwen3.5",
 		Description: "Reasoning, coding, and visual understanding locally",
-		VRAM:        "~11GB",
+		VRAMBytes:   14 * format.GigaByte,
 	},
 }
diff --git a/server/model_recommendations_test.go b/server/model_recommendations_test.go
index da9e716eb..346462fc4 100644
--- a/server/model_recommendations_test.go
+++ b/server/model_recommendations_test.go
@@ -19,6 +19,7 @@ import (
 
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
+	"github.com/ollama/ollama/format"
 )
 
 func TestModelRecommendationsDefaultOrder(t *testing.T) {
@@ -41,11 +42,11 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T)
 
 	first := []api.ModelRecommendation{
 		{Model: " first-cloud:cloud ", Description: " first ", ContextLength: 2048, MaxOutputTokens: 512},
-		{Model: " first-local ", Description: " first local ", VRAM: " ~3GB "},
+		{Model: " first-local ", Description: " first local ", VRAMBytes: 3 * format.GigaByte},
 	}
 	second := []api.ModelRecommendation{
 		{Model: "second-cloud:cloud", Description: "second", ContextLength: 4096, MaxOutputTokens: 1024},
-		{Model: "second-local", Description: "second local", VRAM: "~6GB"},
+		{Model: "second-local", Description: "second local", VRAMBytes: 6 * format.GigaByte},
 	}
 
 	calls := 0
@@ -76,7 +77,7 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T)
 	}
 	if got, want := cache.Get(), []api.ModelRecommendation{
 		{Model: "first-cloud:cloud", Description: "first", ContextLength: 2048, MaxOutputTokens: 512},
-		{Model: "first-local", Description: "first local", VRAM: "~3GB"},
+		{Model: "first-local", Description: "first local", VRAMBytes: 3 * format.GigaByte},
 	}; !slices.Equal(got, want) {
 		t.Fatalf("after first refresh recommendations = %#v, want %#v", got, want)
 	}
@@ -160,7 +161,7 @@ func TestModelRecommendationsCacheRefreshErrorCasesPreserveCurrentData(t *testin
 			setupModelRecommendationsTestEnv(t, "")
 
 			cache := newModelRecommendationsCache()
-			stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAM: "~2GB"}}
+			stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAMBytes: 2 * format.GigaByte}}
 			cache.set(stable)
 			cache.client = &http.Client{Transport: tc.transport}
 
@@ -211,7 +212,7 @@ func TestModelRecommendationsSnapshotPersistAndLoad(t *testing.T) {
 
 	want := []api.ModelRecommendation{
 		{Model: "persist-cloud:cloud", Description: "persisted", ContextLength: 8192, MaxOutputTokens: 2048},
-		{Model: "persist-local", Description: "persisted local", VRAM: "~5GB"},
+		{Model: "persist-local", Description: "persisted local", VRAMBytes: 5 * format.GigaByte},
 	}
 
 	writer := newModelRecommendationsCache()
@@ -256,7 +257,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing
 	input := []api.ModelRecommendation{
 		{Model: " good-cloud:cloud ", Description: " good cloud ", ContextLength: 1024, MaxOutputTokens: 256},
 		{Model: "bad-cloud:cloud", Description: "missing limits"},
-		{Model: " good-local ", Description: " good local ", VRAM: " ~2GB "},
+		{Model: " good-local ", Description: " good local ", VRAMBytes: 2 * format.GigaByte},
 	}
 
 	got, err := validateModelRecommendations(input)
@@ -266,7 +267,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing
 
 	want := []api.ModelRecommendation{
 		{Model: "good-cloud:cloud", Description: "good cloud", ContextLength: 1024, MaxOutputTokens: 256},
-		{Model: "good-local", Description: "good local", VRAM: "~2GB"},
+		{Model: "good-local", Description: "good local", VRAMBytes: 2 * format.GigaByte},
 	}
 	if !slices.Equal(got, want) {
 		t.Fatalf("validated recommendations = %#v, want %#v", got, want)