Update the default Sourcegraph-supplied LLM models (#64281)

This PR sets the defaults for "Sourcegraph supplied LLM models". 

## When will these "defaults" be used?

These models will _only_ be used IFF the Sourcegraph instance is
_explicitly_ using the newer "modelConfiguration" site configuration
data. (And opts into using Sourcegraph-supplied LLM models.)

If the Sourcegraph instance is using the older "completions"
configuration blob, then _only_ the user-supplied models will be used.
(Or, based on the specific defaults defined in the code for the
completions provider.)

## What about Cody Free or Cody Pro?

😬 yeah, we're going to need to deal with that later. Currently
Sourcegraph.com is _not_ using the newer "modelConfiguration" site
configuration, and instead we have some hacks in the code to ignore the
internal modelconfig. See this "super-shady hack":

e5178a6bc0/cmd/frontend/internal/httpapi/completions/get_model.go (L425-L455)

So we are just erring on the side of having Cody Free / Cody Pro "do
whatever they do now", and this PR won't have any impact on that.

We _do_ want Sourcegraph.com to only return this data, but there are a
few things we need to get straightened out first. (e.g. Cody Gateway
being ware of mrefs, and having Cody Clients no longer using `dotcom.ts`
to hard-code Cody Pro LLM models.)

## What does this PR actually _do_?

1. It updates the code in `cmd/cody-gateway-config` so that it will
produce a new "supported-models.json" file.
2. I then ran the tool manually, the output of which was then written to
`internal/modelconfig/embedded/models.json`.
3. That's it.

For any Sourcegraph releases after this PR gets merged, the "Sourcegraph
supplied LLM models" will be the newer set defined in `models.json`.
(i.e. having these new defaults, and including
"fireworks::v1::starcoder".)

## Test plan

~~I tested things locally, and unfortunately it doesn't look like any
clients are filtering based on the model capabilities. So "StarCoder" is
showing up in the Cody Web UI, despite failing at runtime.~~

Update: This was a problem on my end. This isn't an issue.


## Changelog

NA?
This commit is contained in:
Chris Smith 2024-08-06 10:26:56 -07:00 committed by GitHub
parent ee93777dd4
commit b0bb67b47c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 80 additions and 14 deletions

View File

@ -18,6 +18,9 @@ var (
types.ModelCapabilityAutocomplete,
types.ModelCapabilityChat,
}
editOnly = []types.ModelCapability{
types.ModelCapabilityAutocomplete,
}
// Standard context window sizes.
standardCtxWindow = types.ContextWindow{
@ -145,6 +148,42 @@ func getAnthropicModels() []types.Model {
}
}
func getFireworksModels() []types.Model {
// https://docs.fireworks.ai/api-reference/post-completions
const fireworksV1 = "fireworks::v1"
return []types.Model{
// https://huggingface.co/blog/starcoder
newModel(
modelIdentity{
MRef: mRef(fireworksV1, "starcoder"),
// NOTE: This model name is virtualized.
//
// When Cody Gateway receives a request using model
// "fireworks/starcoder", it will then pick a specialized
// model name such as "starcoder2-15b" or "starcoder-7b".
Name: "starcoder",
DisplayName: "StarCoder",
},
modelMetadata{
Capabilities: editOnly,
Category: types.ModelCategorySpeed,
Status: types.ModelStatusStable,
Tier: types.ModelTierPro,
},
types.ContextWindow{
// These values are much lower than other, text-centric models. We are
// erring on the side of matching the token limits defined on the client
// today. (And maybe the StarCoder is able to use a more efficient
// tokenizer, because it's not processing many languages.)
// https://github.com/sourcegraph/cody/blob/066d9c6ff48beb96a834f17021affc4e62094415/vscode/src/completions/providers/fireworks.ts#L132
// https://github.com/sourcegraph/cody/blob/066d9c6ff48beb96a834f17021affc4e62094415/vscode/src/completions/providers/get-completion-params.ts#L5
MaxInputTokens: 2048,
MaxOutputTokens: 256,
}),
}
}
func getGoogleModels() []types.Model {
const (
// Gemini API versions.
@ -279,6 +318,7 @@ func GetCodyFreeProModels() ([]types.Model, error) {
// ================================================
var allModels []types.Model
allModels = append(allModels, getAnthropicModels()...)
allModels = append(allModels, getFireworksModels()...)
allModels = append(allModels, getGoogleModels()...)
allModels = append(allModels, getMistralModels()...)
allModels = append(allModels, getOpenAIModels()...)

View File

@ -27,6 +27,11 @@ var (
func main() {
flag.Parse()
liblog := log.Init(log.Resource{
Name: "Cody Gateway Configuration App",
})
defer liblog.Sync()
logger := log.Scoped("cody-gateway-config")
// Generate the configuration.
@ -88,9 +93,9 @@ func GenerateModelConfigurationDoc() (*types.ModelConfiguration, error) {
Models: dotcomModels,
DefaultModels: types.DefaultModels{
Chat: types.ModelRef("anthropic::2023-06-01::claude-3-sonnet"),
CodeCompletion: types.ModelRef("anthropic::2023-06-01::claude-3-sonnet"),
FastChat: types.ModelRef("anthropic::2023-06-01::claude-3-sonnet"),
Chat: types.ModelRef("anthropic::2023-06-01::claude-3.5-sonnet"),
CodeCompletion: types.ModelRef("fireworks::v1::starcoder"),
FastChat: types.ModelRef("anthropic::2023-06-01::claude-3-haiku"),
},
}

View File

@ -12,9 +12,13 @@ func GetProviders() ([]types.Provider, error) {
// ================================================
allProviders := []types.Provider{
newProvider("anthropic", "Anthropic"),
newProvider("fireworks", "Fireworks"),
newProvider("google", "Google"),
newProvider("mistral", "Mistral"),
newProvider("openai", "OpenAI"),
// Special case, as mistral models will get
// served via our Fireworks integration.
newProvider("mistral", "Mistral"),
}
// Validate the Provider data.

View File

@ -263,7 +263,7 @@ func TestCodyProModelAllowlists(t *testing.T) {
}
}
if !supportsChat {
t.Logf("NA. Skipping model %q as it does not support chat.", sourcegraphSuppliedModel.ModelRef)
t.Skipf("NA. Skipping model %q as it does not support chat.", sourcegraphSuppliedModel.ModelRef)
}
legacyModelRef := toLegacyMRef(sourcegraphSuppliedModel.ModelRef)

View File

@ -6,17 +6,21 @@
"id": "anthropic",
"displayName": "Anthropic"
},
{
"id": "fireworks",
"displayName": "Fireworks"
},
{
"id": "google",
"displayName": "Google"
},
{
"id": "mistral",
"displayName": "Mistral"
},
{
"id": "openai",
"displayName": "OpenAI"
},
{
"id": "mistral",
"displayName": "Mistral"
}
],
"models": [
@ -111,6 +115,19 @@
"maxOutputTokens": 4000
}
},
{
"modelRef": "fireworks::v1::starcoder",
"displayName": "StarCoder",
"modelName": "starcoder",
"capabilities": ["autocomplete"],
"category": "speed",
"status": "stable",
"tier": "pro",
"contextWindow": {
"maxInputTokens": 2048,
"maxOutputTokens": 256
}
},
{
"modelRef": "google::v1::gemini-1.5-pro-latest",
"displayName": "Gemini 1.5 Pro",
@ -204,8 +221,8 @@
}
],
"defaultModels": {
"chat": "anthropic::2023-06-01::claude-3-sonnet",
"fastChat": "anthropic::2023-06-01::claude-3-sonnet",
"codeCompletion": "anthropic::2023-06-01::claude-3-sonnet"
"chat": "anthropic::2023-06-01::claude-3.5-sonnet",
"fastChat": "anthropic::2023-06-01::claude-3-haiku",
"codeCompletion": "fireworks::v1::starcoder"
}
}

View File

@ -163,10 +163,10 @@ func ValidateModelConfig(cfg *types.ModelConfiguration) error {
return errors.Errorf("unknown chat model %q", cfg.DefaultModels.Chat)
}
if !isKnownModel(cfg.DefaultModels.CodeCompletion) {
return errors.Errorf("unknown chat model %q", cfg.DefaultModels.CodeCompletion)
return errors.Errorf("unknown code completion model %q", cfg.DefaultModels.CodeCompletion)
}
if !isKnownModel(cfg.DefaultModels.FastChat) {
return errors.Errorf("unknown chat model %q", cfg.DefaultModels.FastChat)
return errors.Errorf("unknown fast chat model %q", cfg.DefaultModels.FastChat)
}
return nil