Update the default Sourcegraph-supplied LLM models (#64281)

This PR sets the defaults for "Sourcegraph supplied LLM models". ## When will these "defaults" be used? These models will _only_ be used IFF the Sourcegraph instance is _explicitly_ using the newer "modelConfiguration" site configuration data. (And opts into using Sourcegraph-supplied LLM models.) If the Sourcegraph instance is using the older "completions" configuration blob, then _only_ the user-supplied models will be used. (Or, based on the specific defaults defined in the code for the completions provider.) ## What about Cody Free or Cody Pro? 😬 yeah, we're going to need to deal with that later. Currently Sourcegraph.com is _not_ using the newer "modelConfiguration" site configuration, and instead we have some hacks in the code to ignore the internal modelconfig. See this "super-shady hack": e5178a6bc0/cmd/frontend/internal/httpapi/completions/get_model.go (L425-L455) So we are just erring on the side of having Cody Free / Cody Pro "do whatever they do now", and this PR won't have any impact on that. We _do_ want Sourcegraph.com to only return this data, but there are a few things we need to get straightened out first. (e.g. Cody Gateway being ware of mrefs, and having Cody Clients no longer using `dotcom.ts` to hard-code Cody Pro LLM models.) ## What does this PR actually _do_? 1. It updates the code in `cmd/cody-gateway-config` so that it will produce a new "supported-models.json" file. 2. I then ran the tool manually, the output of which was then written to `internal/modelconfig/embedded/models.json`. 3. That's it. For any Sourcegraph releases after this PR gets merged, the "Sourcegraph supplied LLM models" will be the newer set defined in `models.json`. (i.e. having these new defaults, and including "fireworks::v1::starcoder".) ## Test plan ~~I tested things locally, and unfortunately it doesn't look like any clients are filtering based on the model capabilities. So "StarCoder" is showing up in the Cody Web UI, despite failing at runtime.~~ Update: This was a problem on my end. This isn't an issue. ## Changelog NA?
2026-02-06 18:51:59 +00:00 · 2024-08-06 10:26:56 -07:00 · 2024-08-06 10:26:56 -07:00 · b0bb67b47c
commit b0bb67b47c
parent ee93777dd4
6 changed files with 80 additions and 14 deletions
--- a/cmd/cody-gateway-config/dotcom_models.go
+++ b/cmd/cody-gateway-config/dotcom_models.go
@ -18,6 +18,9 @@ var (
 		types.ModelCapabilityAutocomplete,
 		types.ModelCapabilityChat,
 	}
+	editOnly = []types.ModelCapability{
+		types.ModelCapabilityAutocomplete,
+	}

 	// Standard context window sizes.
 	standardCtxWindow = types.ContextWindow{
@ -145,6 +148,42 @@ func getAnthropicModels() []types.Model {
 	}
 }

+func getFireworksModels() []types.Model {
+	// https://docs.fireworks.ai/api-reference/post-completions
+	const fireworksV1 = "fireworks::v1"
+
+	return []types.Model{
+		// https://huggingface.co/blog/starcoder
+		newModel(
+			modelIdentity{
+				MRef: mRef(fireworksV1, "starcoder"),
+				// NOTE: This model name is virtualized.
+				//
+				// When Cody Gateway receives a request using model
+				// "fireworks/starcoder", it will then pick a specialized
+				// model name such as "starcoder2-15b" or "starcoder-7b".
+				Name:        "starcoder",
+				DisplayName: "StarCoder",
+			},
+			modelMetadata{
+				Capabilities: editOnly,
+				Category:     types.ModelCategorySpeed,
+				Status:       types.ModelStatusStable,
+				Tier:         types.ModelTierPro,
+			},
+			types.ContextWindow{
+				// These values are much lower than other, text-centric models. We are
+				// erring on the side of matching the token limits defined on the client
+				// today. (And maybe the StarCoder is able to use a more efficient
+				// tokenizer, because it's not processing many languages.)
+				// https://github.com/sourcegraph/cody/blob/066d9c6ff48beb96a834f17021affc4e62094415/vscode/src/completions/providers/fireworks.ts#L132
+				// https://github.com/sourcegraph/cody/blob/066d9c6ff48beb96a834f17021affc4e62094415/vscode/src/completions/providers/get-completion-params.ts#L5
+				MaxInputTokens:  2048,
+				MaxOutputTokens: 256,
+			}),
+	}
+}
+
 func getGoogleModels() []types.Model {
 	const (
 		// Gemini API versions.
@ -279,6 +318,7 @@ func GetCodyFreeProModels() ([]types.Model, error) {
 	// ================================================
 	var allModels []types.Model
 	allModels = append(allModels, getAnthropicModels()...)
+	allModels = append(allModels, getFireworksModels()...)
 	allModels = append(allModels, getGoogleModels()...)
 	allModels = append(allModels, getMistralModels()...)
 	allModels = append(allModels, getOpenAIModels()...)
--- a/cmd/cody-gateway-config/main.go
+++ b/cmd/cody-gateway-config/main.go
@ -27,6 +27,11 @@ var (
 func main() {
 	flag.Parse()

+	liblog := log.Init(log.Resource{
+		Name: "Cody Gateway Configuration App",
+	})
+	defer liblog.Sync()
+
 	logger := log.Scoped("cody-gateway-config")

 	// Generate the configuration.
@ -88,9 +93,9 @@ func GenerateModelConfigurationDoc() (*types.ModelConfiguration, error) {
 		Models: dotcomModels,

 		DefaultModels: types.DefaultModels{
-			Chat:           types.ModelRef("anthropic::2023-06-01::claude-3-sonnet"),
-			CodeCompletion: types.ModelRef("anthropic::2023-06-01::claude-3-sonnet"),
-			FastChat:       types.ModelRef("anthropic::2023-06-01::claude-3-sonnet"),
+			Chat:           types.ModelRef("anthropic::2023-06-01::claude-3.5-sonnet"),
+			CodeCompletion: types.ModelRef("fireworks::v1::starcoder"),
+			FastChat:       types.ModelRef("anthropic::2023-06-01::claude-3-haiku"),
 		},
 	}

--- a/cmd/cody-gateway-config/providers.go
+++ b/cmd/cody-gateway-config/providers.go
@ -12,9 +12,13 @@ func GetProviders() ([]types.Provider, error) {
 	// ================================================
 	allProviders := []types.Provider{
 		newProvider("anthropic", "Anthropic"),
+		newProvider("fireworks", "Fireworks"),
 		newProvider("google", "Google"),
-		newProvider("mistral", "Mistral"),
 		newProvider("openai", "OpenAI"),
+
+		// Special case, as mistral models will get
+		// served via our Fireworks integration.
+		newProvider("mistral", "Mistral"),
 	}

 	// Validate the Provider data.
--- a/cmd/frontend/internal/httpapi/completions/get_model_test.go
+++ b/cmd/frontend/internal/httpapi/completions/get_model_test.go
@ -263,7 +263,7 @@ func TestCodyProModelAllowlists(t *testing.T) {
 					}
 				}
 				if !supportsChat {
-					t.Logf("NA. Skipping model %q as it does not support chat.", sourcegraphSuppliedModel.ModelRef)
+					t.Skipf("NA. Skipping model %q as it does not support chat.", sourcegraphSuppliedModel.ModelRef)
 				}

 				legacyModelRef := toLegacyMRef(sourcegraphSuppliedModel.ModelRef)
--- a/internal/modelconfig/embedded/models.json
+++ b/internal/modelconfig/embedded/models.json
@ -6,17 +6,21 @@
      "id": "anthropic",
      "displayName": "Anthropic"
    },
+    {
+      "id": "fireworks",
+      "displayName": "Fireworks"
+    },
    {
      "id": "google",
      "displayName": "Google"
    },
-    {
-      "id": "mistral",
-      "displayName": "Mistral"
-    },
    {
      "id": "openai",
      "displayName": "OpenAI"
+    },
+    {
+      "id": "mistral",
+      "displayName": "Mistral"
    }
  ],
  "models": [
@ -111,6 +115,19 @@
        "maxOutputTokens": 4000
      }
    },
+    {
+      "modelRef": "fireworks::v1::starcoder",
+      "displayName": "StarCoder",
+      "modelName": "starcoder",
+      "capabilities": ["autocomplete"],
+      "category": "speed",
+      "status": "stable",
+      "tier": "pro",
+      "contextWindow": {
+        "maxInputTokens": 2048,
+        "maxOutputTokens": 256
+      }
+    },
    {
      "modelRef": "google::v1::gemini-1.5-pro-latest",
      "displayName": "Gemini 1.5 Pro",
@ -204,8 +221,8 @@
    }
  ],
  "defaultModels": {
-    "chat": "anthropic::2023-06-01::claude-3-sonnet",
-    "fastChat": "anthropic::2023-06-01::claude-3-sonnet",
-    "codeCompletion": "anthropic::2023-06-01::claude-3-sonnet"
+    "chat": "anthropic::2023-06-01::claude-3.5-sonnet",
+    "fastChat": "anthropic::2023-06-01::claude-3-haiku",
+    "codeCompletion": "fireworks::v1::starcoder"
  }
 }
--- a/internal/modelconfig/validation.go
+++ b/internal/modelconfig/validation.go
@ -163,10 +163,10 @@ func ValidateModelConfig(cfg *types.ModelConfiguration) error {
 		return errors.Errorf("unknown chat model %q", cfg.DefaultModels.Chat)
 	}
 	if !isKnownModel(cfg.DefaultModels.CodeCompletion) {
-		return errors.Errorf("unknown chat model %q", cfg.DefaultModels.CodeCompletion)
+		return errors.Errorf("unknown code completion model %q", cfg.DefaultModels.CodeCompletion)
 	}
 	if !isKnownModel(cfg.DefaultModels.FastChat) {
-		return errors.Errorf("unknown chat model %q", cfg.DefaultModels.FastChat)
+		return errors.Errorf("unknown fast chat model %q", cfg.DefaultModels.FastChat)
 	}

 	return nil