embeddings: replace newlines before for OpenAI model only (#52183)

We replace newlines with spaces for OpenAI embedding models only. This
allows using other (OSS) models which do not have such requirements.

From OpenAI's Python lib:
https://sourcegraph.com/github.com/openai/openai-python/-/blob/openai/embeddings_utils.py?L46


## Test plan

* Manual testing
This commit is contained in:
Rok Novosel 2023-05-19 11:59:41 +02:00 committed by GitHub
parent db2fee355b
commit 08bce28d41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -82,11 +82,19 @@ func (c *embeddingsClient) GetEmbeddingsWithRetries(ctx context.Context, texts [
return nil, err
}
var MODELS_WITHOUT_NEWLINES = map[string]struct{}{
"text-embedding-ada-002": {},
}
func GetEmbeddings(ctx context.Context, texts []string, config *schema.Embeddings) ([]float32, error) {
// Replace newlines, which can negatively affect performance.
augmentedTexts := make([]string, len(texts))
for idx, text := range texts {
augmentedTexts[idx] = strings.ReplaceAll(text, "\n", " ")
_, replaceNewlines := MODELS_WITHOUT_NEWLINES[config.Model]
augmentedTexts := texts
if replaceNewlines {
augmentedTexts = make([]string, len(texts))
// Replace newlines for certain (OpenAI) models, because they can negatively affect performance.
for idx, text := range texts {
augmentedTexts[idx] = strings.ReplaceAll(text, "\n", " ")
}
}
request := EmbeddingAPIRequest{Model: config.Model, Input: augmentedTexts}