chore(worker): move llm token counter to worker (#64008)

This moves the LLM token counter from `frontend` to the `worker`.

Test plan:
new unit test
This commit is contained in:
Stefan Hengl 2024-07-24 10:03:33 +02:00 committed by GitHub
parent 40a436d159
commit d3528061cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 163 additions and 29 deletions

View File

@ -6,20 +6,16 @@ go_library(
srcs = [
"check_redis_cache_eviction_policy.go",
"doc.go",
"store_token_usage_in_postgres.go",
"update_permissions.go",
],
importpath = "github.com/sourcegraph/sourcegraph/cmd/frontend/internal/bg",
visibility = ["//cmd/frontend:__subpackages__"],
deps = [
"//internal/collections",
"//internal/completions/tokenusage",
"//internal/database",
"//internal/rbac",
"//internal/rbac/types",
"//internal/redispool",
"//internal/telemetry",
"//internal/telemetry/telemetryrecorder",
"//internal/types",
"//lib/errors",
"@com_github_gomodule_redigo//redis",

View File

@ -213,7 +213,6 @@ func Main(ctx context.Context, observationCtx *observation.Context, ready servic
goroutine.Go(func() { bg.UpdatePermissions(ctx, logger, db) })
// Recurring
goroutine.Go(func() { bg.ScheduleStoreTokenUsage(ctx, db) })
goroutine.Go(func() { updatecheck.Start(logger, db) })
goroutine.Go(func() { adminanalytics.StartAnalyticsCacheRefresh(context.Background(), db) })
goroutine.Go(func() { users.StartUpdateAggregatedUsersStatisticsTable(context.Background(), db) })

View File

@ -0,0 +1,39 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("//dev:go_defs.bzl", "go_test")
go_library(
name = "completions",
srcs = [
"job.go",
"tokens.go",
],
importpath = "github.com/sourcegraph/sourcegraph/cmd/worker/internal/completions",
visibility = ["//cmd/worker:__subpackages__"],
deps = [
"//cmd/worker/job",
"//cmd/worker/shared/init/db",
"//internal/completions/tokenusage",
"//internal/database",
"//internal/env",
"//internal/goroutine",
"//internal/metrics",
"//internal/observation",
"//internal/telemetry",
"//internal/telemetry/telemetryrecorder",
],
)
go_test(
name = "completions_test",
srcs = ["tokens_test.go"],
embed = [":completions"],
tags = ["requires-network"],
deps = [
"//internal/completions/tokenusage",
"//internal/rcache",
"//internal/telemetry",
"//internal/telemetry/telemetrytest",
"//lib/telemetrygateway/v1:telemetrygateway",
"@com_github_stretchr_testify//require",
],
)

View File

@ -0,0 +1,67 @@
package completions
import (
"context"
"time"
"github.com/sourcegraph/sourcegraph/cmd/worker/job"
"github.com/sourcegraph/sourcegraph/internal/completions/tokenusage"
"github.com/sourcegraph/sourcegraph/internal/metrics"
"github.com/sourcegraph/sourcegraph/internal/telemetry/telemetryrecorder"
workerdb "github.com/sourcegraph/sourcegraph/cmd/worker/shared/init/db"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/env"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type tokenUsageJob struct{}
func NewTokenUsageJob() job.Job {
return &tokenUsageJob{}
}
func (e tokenUsageJob) Description() string {
return "stores LLM token usage in DB"
}
func (e tokenUsageJob) Config() []env.Config {
return nil
}
func (e tokenUsageJob) Routines(_ context.Context, observationCtx *observation.Context) ([]goroutine.BackgroundRoutine, error) {
db, err := workerdb.InitDB(observationCtx)
if err != nil {
return nil, err
}
return []goroutine.BackgroundRoutine{
newTokenUsageJob(observationCtx, db),
},
nil
}
func newTokenUsageJob(observationCtx *observation.Context, db database.DB) goroutine.BackgroundRoutine {
handler := goroutine.HandlerFunc(func(ctx context.Context) error {
return recordTokenUsage(ctx, tokenusage.NewManager(), telemetryrecorder.New(db))
})
operation := observationCtx.Operation(observation.Op{
Name: "cody.llmTokenCounter.record",
Metrics: metrics.NewREDMetrics(
observationCtx.Registerer,
"cody_llm_token_counter",
metrics.WithCountHelp("Total number of cody_llm_token_counter executions"),
),
})
return goroutine.NewPeriodicGoroutine(
context.Background(),
handler,
goroutine.WithName("cody_llm_token_counter"),
goroutine.WithDescription("Stores LLM token usage in DB"),
goroutine.WithInterval(5*time.Minute),
goroutine.WithOperation(operation),
)
}

View File

@ -1,31 +1,13 @@
package bg
package completions
import (
"context"
"fmt"
"time"
"github.com/sourcegraph/sourcegraph/internal/completions/tokenusage"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/telemetry"
"github.com/sourcegraph/sourcegraph/internal/telemetry/telemetryrecorder"
)
func ScheduleStoreTokenUsage(ctx context.Context, db database.DB) {
for {
err := storeTokenUsageinDb(ctx, db)
if err != nil {
fmt.Printf("Error storing token usage: %v\n", err)
}
// Wait for 5 minutes before the next execution
time.Sleep(5 * time.Minute)
}
}
func storeTokenUsageinDb(ctx context.Context, db database.DB) error {
recorder := telemetryrecorder.New(db)
tokenManager := tokenusage.NewManager()
func recordTokenUsage(ctx context.Context, tokenManager *tokenusage.Manager, recorder *telemetry.EventRecorder) error {
tokenUsageData, err := tokenManager.FetchTokenUsageDataForAnalysis()
if err != nil {
return err

View File

@ -0,0 +1,40 @@
package completions
import (
"context"
"testing"
"github.com/stretchr/testify/require"
"github.com/sourcegraph/sourcegraph/internal/completions/tokenusage"
"github.com/sourcegraph/sourcegraph/internal/rcache"
"github.com/sourcegraph/sourcegraph/internal/telemetry"
"github.com/sourcegraph/sourcegraph/internal/telemetry/telemetrytest"
v1 "github.com/sourcegraph/sourcegraph/lib/telemetrygateway/v1"
)
func TestStoreTokenUsageInDB(t *testing.T) {
kv := rcache.SetupForTest(t)
cache := rcache.NewWithTTL(kv, "LLMUsage", 1800)
cache.SetInt("LLMUsage:model1:feature1:stream:input", 10)
cache.SetInt("LLMUsage:model1:feature1:stream:output", 20)
manager := tokenusage.NewManagerWithCache(cache)
mockEventStore := telemetrytest.NewMockEventsStore()
var sentEvent []*v1.Event
mockEventStore.StoreEventsFunc.SetDefaultHook(func(ctx context.Context, event []*v1.Event) error {
sentEvent = event
return nil
})
recorder := telemetry.NewEventRecorder(mockEventStore)
err := recordTokenUsage(context.Background(), manager, recorder)
require.NoError(t, err)
require.Equal(t, len(sentEvent), 1)
require.Equal(t, sentEvent[0].Feature, "cody.llmTokenCounter")
require.Equal(t, map[string]float64{
"LLMUsage:model1:feature1:stream:input": 10,
"LLMUsage:model1:feature1:stream:output": 20,
"FinalFetchAndSync": 0.0,
}, sentEvent[0].Parameters.Metadata)
}

View File

@ -35,13 +35,13 @@ func (e eventLogsJob) Routines(_ context.Context, observationCtx *observation.Co
}
return []goroutine.BackgroundRoutine{
NewEventLogsJob(observationCtx, db),
NewSecurityEventLogsJob(observationCtx, db),
newEventLogsJob(observationCtx, db),
newSecurityEventLogsJob(observationCtx, db),
},
nil
}
func NewEventLogsJob(observationCtx *observation.Context, db database.DB) goroutine.BackgroundRoutine {
func newEventLogsJob(observationCtx *observation.Context, db database.DB) goroutine.BackgroundRoutine {
handler := goroutine.HandlerFunc(func(ctx context.Context) error {
return deleteOldEventLogsInPostgres(ctx, db)
})
@ -65,7 +65,7 @@ func NewEventLogsJob(observationCtx *observation.Context, db database.DB) gorout
)
}
func NewSecurityEventLogsJob(observationCtx *observation.Context, db database.DB) goroutine.BackgroundRoutine {
func newSecurityEventLogsJob(observationCtx *observation.Context, db database.DB) goroutine.BackgroundRoutine {
handler := goroutine.HandlerFunc(func(ctx context.Context) error {
return deleteOldSecurityEventLogsInPostgres(ctx, db)
})

View File

@ -16,6 +16,7 @@ go_library(
"//cmd/worker/internal/codeintel",
"//cmd/worker/internal/codemonitors",
"//cmd/worker/internal/codygateway",
"//cmd/worker/internal/completions",
"//cmd/worker/internal/embeddings/repo",
"//cmd/worker/internal/encryption",
"//cmd/worker/internal/eventlogs",

View File

@ -17,6 +17,7 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/worker/internal/codeintel"
"github.com/sourcegraph/sourcegraph/cmd/worker/internal/codemonitors"
"github.com/sourcegraph/sourcegraph/cmd/worker/internal/codygateway"
"github.com/sourcegraph/sourcegraph/cmd/worker/internal/completions"
repoembeddings "github.com/sourcegraph/sourcegraph/cmd/worker/internal/embeddings/repo"
"github.com/sourcegraph/sourcegraph/cmd/worker/internal/encryption"
"github.com/sourcegraph/sourcegraph/cmd/worker/internal/eventlogs"
@ -106,6 +107,7 @@ func LoadConfig(registerEnterpriseMigrators oobmigration.RegisterMigratorsFunc)
"export-usage-telemetry": telemetry.NewTelemetryJob(),
"telemetrygateway-exporter": telemetrygatewayexporter.NewJob(),
"event-logs-janitor": eventlogs.NewEventLogsJanitorJob(),
"cody-llm-token-counter": completions.NewTokenUsageJob(),
"codeintel-policies-repository-matcher": codeintel.NewPoliciesRepositoryMatcherJob(),
"codeintel-autoindexing-summary-builder": codeintel.NewAutoindexingSummaryBuilder(),

View File

@ -9,6 +9,7 @@ go_library(
visibility = [
"//cmd/cody-gateway:__subpackages__",
"//cmd/frontend/internal/bg:__pkg__",
"//cmd/worker/internal/completions:__pkg__",
"//internal/completions/client:__pkg__",
"//internal/completions/client/anthropic:__pkg__",
"//internal/completions/client/awsbedrock:__pkg__",

View File

@ -24,6 +24,12 @@ func NewManager() *Manager {
}
}
func NewManagerWithCache(cache *rcache.Cache) *Manager {
return &Manager{
cache: cache,
}
}
type Provider string
const (

View File

@ -44,6 +44,7 @@ go_library(
"//cmd/telemetry-gateway/shared:__pkg__",
"//cmd/telemetrygateway/server:__pkg__",
"//cmd/telemetrygateway/shared:__pkg__",
"//cmd/worker/internal/completions:__pkg__",
"//internal/api:__pkg__",
"//internal/database:__pkg__",
"//internal/database/dbmocks:__pkg__",