2022-06-23 12:35:39 +00:00
|
|
|
# Documentation for how to override sg configuration for local development:
|
|
|
|
|
# https://github.com/sourcegraph/sourcegraph/blob/main/doc/dev/background-information/sg/index.md#configuration
|
2021-03-29 08:14:53 +00:00
|
|
|
env:
|
2021-07-21 15:37:24 +00:00
|
|
|
PGPORT: 5432
|
|
|
|
|
PGHOST: localhost
|
|
|
|
|
PGUSER: sourcegraph
|
|
|
|
|
PGPASSWORD: sourcegraph
|
|
|
|
|
PGDATABASE: sourcegraph
|
|
|
|
|
PGSSLMODE: disable
|
2021-12-10 13:59:33 +00:00
|
|
|
SG_DEV_MIGRATE_ON_APPLICATION_STARTUP: 'true'
|
2022-04-29 19:13:57 +00:00
|
|
|
INSECURE_DEV: true
|
2022-01-06 11:49:45 +00:00
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos
|
|
|
|
|
SRC_LOG_LEVEL: info
|
|
|
|
|
SRC_LOG_FORMAT: condensed
|
2022-04-29 15:40:09 +00:00
|
|
|
SRC_TRACE_LOG: false
|
2022-02-01 20:55:40 +00:00
|
|
|
# Set this to true to show an iTerm link to the file:line where the log message came from
|
|
|
|
|
SRC_LOG_SOURCE_LINK: false
|
2022-10-10 13:26:58 +00:00
|
|
|
|
|
|
|
|
# Use two gitserver instances in local dev
|
|
|
|
|
SRC_GIT_SERVER_1: 127.0.0.1:3501
|
|
|
|
|
SRC_GIT_SERVER_2: 127.0.0.1:3502
|
|
|
|
|
SRC_GIT_SERVERS: 127.0.0.1:3501 127.0.0.1:3502
|
2021-03-29 08:14:53 +00:00
|
|
|
|
|
|
|
|
# Enable sharded indexed search mode:
|
|
|
|
|
INDEXED_SEARCH_SERVERS: localhost:3070 localhost:3071
|
|
|
|
|
|
2021-08-06 08:51:09 +00:00
|
|
|
GO111MODULE: 'on'
|
|
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
DEPLOY_TYPE: dev
|
|
|
|
|
|
2021-05-31 07:51:39 +00:00
|
|
|
SRC_HTTP_ADDR: ':3082'
|
2021-03-29 08:14:53 +00:00
|
|
|
|
|
|
|
|
GITHUB_BASE_URL: http://127.0.0.1:3180
|
|
|
|
|
# I don't think we even need to set these?
|
|
|
|
|
SEARCHER_URL: http://127.0.0.1:3181
|
|
|
|
|
REPO_UPDATER_URL: http://127.0.0.1:3182
|
|
|
|
|
REDIS_ENDPOINT: 127.0.0.1:6379
|
|
|
|
|
SYMBOLS_URL: http://localhost:3184
|
embeddings: searcher and indexer (#48017)
# High-level architecture overview
<img width="2231" alt="Screenshot 2023-02-24 at 15 13 59"
src="https://user-images.githubusercontent.com/6417322/221200130-53c1ff25-4c47-4532-885f-5c4f9dadb05e.png">
# Embeddings
Really quickly: embeddings are a semantic representation of text.
Embeddings are usually floating-point vectors with 256+ elements. The
neat thing about embeddings is that they allow us to search over textual
information using a semantic correlation between the query and the text,
not just syntactic (matching keywords).
In this PR, we implemented an embedding service that will allow us to do
semantic code search over repositories in Sourcegraph. So, for example,
you'll be able to ask, "how do access tokens work in Sourcegraph", and
it will give you a list of the closest matching code files.
Additionally, we build a context detection service powered by
embeddings. In chat applications, it is important to know whether the
user's message requires additional context. We have to differentiate
between two cases: the user asks a general question about the codebase,
or the user references something in the existing conversation. In the
latter case, including the context would ruin the flow of the
conversation, and the chatbot would most likely return a confusing
answer. We determine whether a query _does not_ require additional
context using two approaches:
1. We check if the query contains well-known phrases that would indicate
the user is referencing the existing conversation (e.g., translate
previous, change that)
1. We have a static dataset of messages that require context and a
dataset of messages that do not. We embed both datasets, and then, using
embedding similarity, we can check which set is more similar to the
query.
## GraphQL API
We add four new resolvers to the GraphQL API:
```graphql
extend type Query {
embeddingsSearch(repo: ID!, query: String!, codeResultsCount: Int!, textResultsCount: Int!): EmbeddingsSearchResults!
isContextRequiredForQuery(query: String!): Boolean!
}
extend type Mutation {
scheduleRepositoriesForEmbedding(repoNames: [String!]!): EmptyResponse!
scheduleContextDetectionForEmbedding: EmptyResponse!
}
```
- `embeddingsSearch` performs embeddings search over the repo embeddings
and returns the specified number of results
- `isContextRequiredForQuery` determines whether the given query
requires additional context
- `scheduleRepositoriesForEmbedding` schedules a repo embedding
background job
- `scheduleContextDetectionForEmbedding` schedules a context detection
embedding background job that embeds a static dataset of messages.
## Repo embedding background job
Embedding a repository is implemented as a background job. The
background job handler receives the repository and the revision, which
should be embedded. Handler then gathers a list of files from the
gitserver and excludes files >1MB in size. The list of files is split
into code and text files (.md, .txt), and we build a separate embedding
index for both. We split them because in a combined index, the text
files always tended to feature as top results and didn't leave any room
for code files. Once we have the list of files, the procedure is as
follows:
- For each file
- Get file contents from gitserver
- Check if the file is embeddable (is not autogenerated, is large
enough, does not have long lines)
- Split the file into embeddable chunks
- Embed the file chunks using an external embedding service (defined in
site config)
- Add embedded file chunks and metadata to the index
- Metadata contains the file name, the start line, and the end line of
the chunk
- Once all files are processed, the index is marshaled into JSON and
stored in Cloud storage (GCS, S3)
### Site config changes
As mentioned, we use a configurable external embedding API that does the
actual text -> vector embedding part. Ideally, this allows us to swap
embedding providers in the future.
```json
"embeddings": {
"description": "Configuration for embeddings service.",
"type": "object",
"required": ["enabled", "dimensions", "model", "accessToken", "url"],
"properties": {
"enabled": {
"description": "Toggles whether embedding service is enabled.",
"type": "boolean",
"default": false
},
"dimensions": {
"description": "The dimensionality of the embedding vectors.",
"type": "integer",
"minimum": 0
},
"model": {
"description": "The model used for embedding.",
"type": "string"
},
"accessToken": {
"description": "The access token used to authenticate with the external embedding API service.",
"type": "string"
},
"url": {
"description": "The url to the external embedding API service.",
"type": "string",
"format": "uri"
}
}
}
```
## Repo embeddings search
The repo embeddings search is implemented in its own service. When a
user queries a repo using embeddings search, the following happens:
- Download the repo embedding index from blob storage and cache it in
memory
- We cache up to 5 embedding indexes in memory
- Embed the query and use the embedded query vector to find similar code
and text file metadata in the embedding index
- Query gitserver for the actual file contents
- Return the results
## Interesting files
- [Similarity
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-102cc83520004eb0e2795e49bc435c5142ca555189b1db3a52bbf1ffb82fa3c6)
- [Repo embedding job
handler](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-c345f373f426398beb4b9cd5852ba862a2718687882db2a8b2d9c7fbb5f1dc52)
- [External embedding api
client](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-ad1e7956f518e4bcaee17dd9e7ac04a5e090c00d970fcd273919e887e1d2cf8f)
- [Embedding a
repo](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-1f35118727128095b7816791b6f0a2e0e060cddee43d25102859b8159465585c)
- [Embeddings searcher
service](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-5b20f3e7ef87041daeeaef98b58ebf7388519cedcdfc359dc5e6d4e0b021472e)
- [Embeddings
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-79f95b9cc3f1ef39c1a0b88015bd9cd6c19c30a8d4c147409f1b8e8cd9462ea1)
- [Repo embedding index cache
management](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-8a41f7dec31054889dbf86e97c52223d5636b4d408c6b375bcfc09160a8b70f8)
- [GraphQL
resolvers](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-9b30a0b5efcb63e2f4611b99ab137fbe09629a769a4f30d10a1b2da41a01d21f)
## Test plan
- Start by filling out the `embeddings` object in the site config (let
me know if you need an API key)
- Start the embeddings service using `sg start embeddings`
- Go to the `/api/console` page and schedule a repo embedding job and a
context detection embedding job:
```graphql
mutation {
scheduleRepositoriesForEmbedding(repoNames: ["github.com/sourcegraph/handbook"]) {
__typename
}
scheduleContextDetectionForEmbedding {
__typename
}
}
```
- Once both are finished, you should be able to query the repo embedding
index, and determine whether context is need for a given query:
```graphql
query {
isContextRequiredForQuery(query: "how do access tokens work")
embeddingsSearch(
repo: "UmVwb3NpdG9yeToy", # github.com/sourcegraph/handbook GQL ID
query: "how do access tokens work",
codeResultsCount: 5,
textResultsCount: 5) {
codeResults {
fileName
content
}
textResults {
fileName
content
}
}
}
```
2023-03-01 09:50:12 +00:00
|
|
|
EMBEDDINGS_URL: http://localhost:9991
|
2021-03-29 08:14:53 +00:00
|
|
|
SRC_SYNTECT_SERVER: http://localhost:9238
|
|
|
|
|
SRC_FRONTEND_INTERNAL: localhost:3090
|
|
|
|
|
GRAFANA_SERVER_URL: http://localhost:3370
|
|
|
|
|
PROMETHEUS_URL: http://localhost:9090
|
|
|
|
|
JAEGER_SERVER_URL: http://localhost:16686
|
|
|
|
|
|
2022-04-21 19:58:50 +00:00
|
|
|
SRC_DEVELOPMENT: 'true'
|
2021-07-05 10:24:20 +00:00
|
|
|
SRC_PROF_HTTP: ''
|
|
|
|
|
SRC_PROF_SERVICES: |
|
|
|
|
|
[
|
2022-06-20 17:57:54 +00:00
|
|
|
{ "Name": "oss-frontend", "Host": "127.0.0.1:6063" },
|
2021-07-05 10:24:20 +00:00
|
|
|
{ "Name": "frontend", "Host": "127.0.0.1:6063" },
|
2022-10-10 13:26:58 +00:00
|
|
|
{ "Name": "gitserver-0", "Host": "127.0.0.1:3551" },
|
|
|
|
|
{ "Name": "gitserver-1", "Host": "127.0.0.1:3552" },
|
2022-12-28 08:44:07 +00:00
|
|
|
{ "Name": "oss-gitserver-0", "Host": "127.0.0.1:3551" },
|
|
|
|
|
{ "Name": "oss-gitserver-1", "Host": "127.0.0.1:3552" },
|
2021-07-05 10:24:20 +00:00
|
|
|
{ "Name": "searcher", "Host": "127.0.0.1:6069" },
|
2022-07-27 15:21:29 +00:00
|
|
|
{ "Name": "oss-symbols", "Host": "127.0.0.1:6071" },
|
2021-07-05 10:24:20 +00:00
|
|
|
{ "Name": "symbols", "Host": "127.0.0.1:6071" },
|
2022-06-20 17:57:54 +00:00
|
|
|
{ "Name": "oss-repo-updater", "Host": "127.0.0.1:6074" },
|
2021-07-05 10:24:20 +00:00
|
|
|
{ "Name": "repo-updater", "Host": "127.0.0.1:6074" },
|
2022-06-20 17:57:54 +00:00
|
|
|
{ "Name": "codeintel-worker", "Host": "127.0.0.1:6088" },
|
|
|
|
|
{ "Name": "oss-worker", "Host": "127.0.0.1:6089" },
|
2022-10-31 17:22:27 +00:00
|
|
|
{ "Name": "github-proxy", "Host": "127.0.0.1:6090" },
|
2021-07-05 10:24:20 +00:00
|
|
|
{ "Name": "worker", "Host": "127.0.0.1:6089" },
|
2022-08-03 10:08:04 +00:00
|
|
|
{ "Name": "worker-executors", "Host": "127.0.0.1:6996" },
|
2022-06-20 17:57:54 +00:00
|
|
|
{ "Name": "zoekt-index-0", "Host": "127.0.0.1:6072" },
|
|
|
|
|
{ "Name": "zoekt-index-1", "Host": "127.0.0.1:6073" },
|
|
|
|
|
{ "Name": "zoekt-web-0", "Host": "127.0.0.1:3070", "DefaultPath": "/debug/requests/" },
|
|
|
|
|
{ "Name": "zoekt-web-1", "Host": "127.0.0.1:3071", "DefaultPath": "/debug/requests/" }
|
2021-07-05 10:24:20 +00:00
|
|
|
]
|
2021-03-29 08:14:53 +00:00
|
|
|
# Settings/config
|
|
|
|
|
SITE_CONFIG_FILE: ./dev/site-config.json
|
|
|
|
|
SITE_CONFIG_ALLOW_EDITS: true
|
|
|
|
|
GLOBAL_SETTINGS_FILE: ./dev/global-settings.json
|
|
|
|
|
GLOBAL_SETTINGS_ALLOW_EDITS: true
|
|
|
|
|
|
2021-04-14 10:53:56 +00:00
|
|
|
# Point codeintel to the `frontend` database in development
|
|
|
|
|
CODEINTEL_PGPORT: $PGPORT
|
|
|
|
|
CODEINTEL_PGHOST: $PGHOST
|
|
|
|
|
CODEINTEL_PGUSER: $PGUSER
|
|
|
|
|
CODEINTEL_PGPASSWORD: $PGPASSWORD
|
|
|
|
|
CODEINTEL_PGDATABASE: $PGDATABASE
|
|
|
|
|
CODEINTEL_PGSSLMODE: $PGSSLMODE
|
|
|
|
|
CODEINTEL_PGDATASOURCE: $PGDATASOURCE
|
|
|
|
|
CODEINTEL_PG_ALLOW_SINGLE_DB: true
|
|
|
|
|
|
2021-05-07 11:01:10 +00:00
|
|
|
# Required for `frontend` and `web` commands
|
|
|
|
|
SOURCEGRAPH_HTTPS_DOMAIN: sourcegraph.test
|
|
|
|
|
SOURCEGRAPH_HTTPS_PORT: 3443
|
|
|
|
|
|
|
|
|
|
# Required for `web` commands
|
2021-10-20 14:07:10 +00:00
|
|
|
NODE_OPTIONS: '--max_old_space_size=8192'
|
2021-05-07 11:01:10 +00:00
|
|
|
# Default `NODE_ENV` to `development`
|
|
|
|
|
NODE_ENV: development
|
|
|
|
|
|
2021-05-31 07:51:39 +00:00
|
|
|
# Required for codeintel uploadstore
|
|
|
|
|
PRECISE_CODE_INTEL_UPLOAD_AWS_ENDPOINT: http://localhost:9000
|
2022-11-30 23:04:48 +00:00
|
|
|
PRECISE_CODE_INTEL_UPLOAD_BACKEND: blobstore
|
2021-05-31 07:51:39 +00:00
|
|
|
|
2023-04-03 15:50:06 +00:00
|
|
|
# Required for embeddings job upload
|
|
|
|
|
EMBEDDINGS_UPLOAD_AWS_ENDPOINT: http://localhost:9000
|
|
|
|
|
|
2021-05-31 07:51:39 +00:00
|
|
|
# Disable auto-indexing the CNCF repo group (this only works in Cloud)
|
|
|
|
|
# This setting will be going away soon
|
|
|
|
|
DISABLE_CNCF: notonmybox
|
|
|
|
|
|
2022-12-06 03:41:14 +00:00
|
|
|
# Point code insights to the `frontend` database in development
|
|
|
|
|
CODEINSIGHTS_PGPORT: $PGPORT
|
|
|
|
|
CODEINSIGHTS_PGHOST: $PGHOST
|
|
|
|
|
CODEINSIGHTS_PGUSER: $PGUSER
|
|
|
|
|
CODEINSIGHTS_PGPASSWORD: $PGPASSWORD
|
|
|
|
|
CODEINSIGHTS_PGDATABASE: $PGDATABASE
|
|
|
|
|
CODEINSIGHTS_PGSSLMODE: $PGSSLMODE
|
|
|
|
|
CODEINSIGHTS_PGDATASOURCE: $PGDATASOURCE
|
|
|
|
|
|
|
|
|
|
# Disable code insights by default
|
2021-05-21 16:01:11 +00:00
|
|
|
DB_STARTUP_TIMEOUT: 120s # codeinsights-db needs more time to start in some instances.
|
|
|
|
|
DISABLE_CODE_INSIGHTS_HISTORICAL: true
|
|
|
|
|
DISABLE_CODE_INSIGHTS: true
|
|
|
|
|
|
2022-08-02 14:09:41 +00:00
|
|
|
# # OpenTelemetry in dev - use single http/json endpoint
|
|
|
|
|
# OTEL_EXPORTER_OTLP_ENDPOINT: http://127.0.0.1:4318
|
|
|
|
|
# OTEL_EXPORTER_OTLP_PROTOCOL: http/json
|
|
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
commands:
|
2022-07-06 17:09:41 +00:00
|
|
|
server:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Run an all-in-one sourcegraph/server image
|
2022-07-06 17:09:41 +00:00
|
|
|
cmd: ./dev/run-server-image.sh
|
|
|
|
|
env:
|
|
|
|
|
TAG: insiders
|
|
|
|
|
CLEAN: 'true'
|
|
|
|
|
DATA: '/tmp/sourcegraph-data'
|
|
|
|
|
URL: 'http://localhost:7080'
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
oss-frontend:
|
|
|
|
|
cmd: .bin/oss-frontend
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-06-20 17:57:54 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/oss-frontend github.com/sourcegraph/sourcegraph/cmd/frontend
|
2022-07-07 13:08:06 +00:00
|
|
|
checkBinary: .bin/oss-frontend
|
2021-03-29 08:14:53 +00:00
|
|
|
env:
|
|
|
|
|
CONFIGURATION_MODE: server
|
|
|
|
|
USE_ENHANCED_LANGUAGE_DETECTION: false
|
2021-04-12 12:14:41 +00:00
|
|
|
# frontend processes need this to be so that the paths to the assets are rendered correctly
|
|
|
|
|
WEBPACK_DEV_SERVER: 1
|
2021-03-29 08:14:53 +00:00
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-03-29 08:14:53 +00:00
|
|
|
- internal
|
|
|
|
|
- cmd/frontend
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
frontend:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Enterprise frontend
|
2021-04-01 11:59:48 +00:00
|
|
|
cmd: |
|
|
|
|
|
# TODO: This should be fixed
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
2022-07-06 21:38:47 +00:00
|
|
|
# If EXTSVC_CONFIG_FILE is *unset*, set a default.
|
|
|
|
|
export EXTSVC_CONFIG_FILE=${EXTSVC_CONFIG_FILE-'../dev-private/enterprise/dev/external-services-config.json'}
|
|
|
|
|
|
2022-07-27 15:21:29 +00:00
|
|
|
.bin/frontend
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-07-27 15:21:29 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/frontend github.com/sourcegraph/sourcegraph/enterprise/cmd/frontend
|
|
|
|
|
checkBinary: .bin/frontend
|
2021-04-01 11:59:48 +00:00
|
|
|
env:
|
|
|
|
|
CONFIGURATION_MODE: server
|
|
|
|
|
USE_ENHANCED_LANGUAGE_DETECTION: false
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
SITE_CONFIG_FILE: '../dev-private/enterprise/dev/site-config.json'
|
2021-09-27 14:34:19 +00:00
|
|
|
SITE_CONFIG_ESCAPE_HATCH_PATH: '$HOME/.sourcegraph/site-config.json'
|
2021-04-12 12:14:41 +00:00
|
|
|
# frontend processes need this to be so that the paths to the assets are rendered correctly
|
|
|
|
|
WEBPACK_DEV_SERVER: 1
|
2021-03-29 08:14:53 +00:00
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-03-29 08:14:53 +00:00
|
|
|
- internal
|
|
|
|
|
- cmd/frontend
|
2021-05-31 07:51:39 +00:00
|
|
|
- enterprise/internal
|
2021-03-29 08:14:53 +00:00
|
|
|
- enterprise/cmd/frontend
|
|
|
|
|
|
2022-10-10 13:26:58 +00:00
|
|
|
gitserver-template: &gitserver_template
|
2021-03-29 08:14:53 +00:00
|
|
|
cmd: .bin/gitserver
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-12-28 08:44:07 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/gitserver github.com/sourcegraph/sourcegraph/enterprise/cmd/gitserver
|
2021-06-02 09:38:06 +00:00
|
|
|
checkBinary: .bin/gitserver
|
2022-10-10 13:26:58 +00:00
|
|
|
env: &gitserverenv
|
2021-03-29 08:14:53 +00:00
|
|
|
HOSTNAME: 127.0.0.1:3178
|
|
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-03-29 08:14:53 +00:00
|
|
|
- internal
|
|
|
|
|
- cmd/gitserver
|
2022-12-28 08:44:07 +00:00
|
|
|
- enterprise/internal
|
|
|
|
|
- enterprise/cmd/gitserver
|
|
|
|
|
|
|
|
|
|
oss-gitserver-template: &oss_gitserver_template
|
|
|
|
|
cmd: .bin/oss-gitserver
|
|
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/oss-gitserver github.com/sourcegraph/sourcegraph/cmd/gitserver
|
|
|
|
|
checkBinary: .bin/oss-gitserver
|
|
|
|
|
env: &oss_gitserverenv
|
|
|
|
|
HOSTNAME: 127.0.0.1:3178
|
|
|
|
|
watch:
|
|
|
|
|
- lib
|
|
|
|
|
- internal
|
|
|
|
|
- cmd/gitserver
|
|
|
|
|
|
|
|
|
|
oss-gitserver-0:
|
|
|
|
|
<<: *oss_gitserver_template
|
|
|
|
|
env:
|
|
|
|
|
<<: *gitserverenv
|
2023-02-03 12:32:08 +00:00
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3501
|
2022-12-28 08:44:07 +00:00
|
|
|
GITSERVER_ADDR: 127.0.0.1:3501
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_1
|
|
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3551
|
|
|
|
|
|
|
|
|
|
oss-gitserver-1:
|
|
|
|
|
<<: *oss_gitserver_template
|
|
|
|
|
env:
|
|
|
|
|
<<: *oss_gitserverenv
|
2023-02-03 12:32:08 +00:00
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3502
|
2022-12-28 08:44:07 +00:00
|
|
|
GITSERVER_ADDR: 127.0.0.1:3502
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_2
|
|
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3552
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2022-10-10 13:26:58 +00:00
|
|
|
# This is only here to stay backwards-compatible with people's custom
|
|
|
|
|
# `sg.config.overwrite.yaml` files
|
|
|
|
|
gitserver:
|
|
|
|
|
<<: *gitserver_template
|
|
|
|
|
|
|
|
|
|
gitserver-0:
|
|
|
|
|
<<: *gitserver_template
|
|
|
|
|
env:
|
|
|
|
|
<<: *gitserverenv
|
2023-02-03 12:32:08 +00:00
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3501
|
2022-10-10 13:26:58 +00:00
|
|
|
GITSERVER_ADDR: 127.0.0.1:3501
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_1
|
2022-10-31 17:22:27 +00:00
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3551
|
2022-10-10 13:26:58 +00:00
|
|
|
|
|
|
|
|
gitserver-1:
|
|
|
|
|
<<: *gitserver_template
|
|
|
|
|
env:
|
|
|
|
|
<<: *gitserverenv
|
2023-02-03 12:32:08 +00:00
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3502
|
2022-10-10 13:26:58 +00:00
|
|
|
GITSERVER_ADDR: 127.0.0.1:3502
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_2
|
2022-10-31 17:22:27 +00:00
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3552
|
2022-10-10 13:26:58 +00:00
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
github-proxy:
|
|
|
|
|
cmd: .bin/github-proxy
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/github-proxy github.com/sourcegraph/sourcegraph/cmd/github-proxy
|
2021-06-02 09:38:06 +00:00
|
|
|
checkBinary: .bin/github-proxy
|
2021-03-29 08:14:53 +00:00
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
|
|
|
|
- internal
|
2021-03-29 08:14:53 +00:00
|
|
|
- cmd/github-proxy
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
oss-worker:
|
|
|
|
|
cmd: .bin/oss-worker
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-06-20 17:57:54 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/oss-worker github.com/sourcegraph/sourcegraph/cmd/worker
|
2021-06-04 19:48:13 +00:00
|
|
|
watch:
|
|
|
|
|
- lib
|
|
|
|
|
- internal
|
|
|
|
|
- cmd/worker
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
worker:
|
2022-03-15 19:41:16 +00:00
|
|
|
cmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
.bin/worker
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/worker github.com/sourcegraph/sourcegraph/enterprise/cmd/worker
|
2021-06-04 19:48:13 +00:00
|
|
|
watch:
|
|
|
|
|
- lib
|
|
|
|
|
- internal
|
|
|
|
|
- enterprise/internal
|
|
|
|
|
- cmd/worker
|
|
|
|
|
- enterprise/cmd/worker
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
oss-repo-updater:
|
|
|
|
|
cmd: .bin/oss-repo-updater
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-06-20 17:57:54 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/oss-repo-updater github.com/sourcegraph/sourcegraph/cmd/repo-updater
|
|
|
|
|
checkBinary: .bin/oss-repo-updater
|
2021-03-29 08:14:53 +00:00
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-03-29 08:14:53 +00:00
|
|
|
- internal
|
|
|
|
|
- cmd/repo-updater
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
repo-updater:
|
2022-09-01 19:27:23 +00:00
|
|
|
cmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
.bin/repo-updater
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-07-27 15:21:29 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/repo-updater github.com/sourcegraph/sourcegraph/enterprise/cmd/repo-updater
|
|
|
|
|
checkBinary: .bin/repo-updater
|
2021-03-29 08:14:53 +00:00
|
|
|
env:
|
2021-04-01 11:59:48 +00:00
|
|
|
ENTERPRISE: 1
|
2021-03-29 08:14:53 +00:00
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-03-29 08:14:53 +00:00
|
|
|
- internal
|
|
|
|
|
- cmd/repo-updater
|
2021-05-31 07:51:39 +00:00
|
|
|
- enterprise/internal
|
2021-03-29 08:14:53 +00:00
|
|
|
- enterprise/cmd/repo-updater
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
oss-symbols:
|
|
|
|
|
cmd: .bin/oss-symbols
|
2021-03-29 08:14:53 +00:00
|
|
|
install: |
|
2021-10-09 01:47:08 +00:00
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
./cmd/symbols/build-ctags.sh &&
|
2022-06-20 17:57:54 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/oss-symbols github.com/sourcegraph/sourcegraph/cmd/symbols
|
2022-07-07 13:08:06 +00:00
|
|
|
checkBinary: .bin/oss-symbols
|
2021-03-29 08:14:53 +00:00
|
|
|
env:
|
2022-12-06 17:32:21 +00:00
|
|
|
CTAGS_COMMAND: dev/universal-ctags-dev
|
2021-03-29 08:14:53 +00:00
|
|
|
CTAGS_PROCESSES: 2
|
|
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-03-29 08:14:53 +00:00
|
|
|
- internal
|
|
|
|
|
- cmd/symbols
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
symbols:
|
2022-07-27 15:21:29 +00:00
|
|
|
cmd: .bin/symbols
|
2022-03-03 04:13:28 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
./cmd/symbols/build-ctags.sh &&
|
2022-07-27 15:21:29 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/symbols github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols
|
|
|
|
|
checkBinary: .bin/symbols
|
2022-03-03 04:13:28 +00:00
|
|
|
env:
|
2022-12-06 17:32:21 +00:00
|
|
|
CTAGS_COMMAND: dev/universal-ctags-dev
|
2022-03-03 04:13:28 +00:00
|
|
|
CTAGS_PROCESSES: 2
|
|
|
|
|
USE_ROCKSKIP: 'false'
|
|
|
|
|
watch:
|
|
|
|
|
- lib
|
|
|
|
|
- internal
|
|
|
|
|
- cmd/symbols
|
|
|
|
|
- enterprise/cmd/symbols
|
|
|
|
|
- enterprise/internal/rockskip
|
|
|
|
|
|
embeddings: searcher and indexer (#48017)
# High-level architecture overview
<img width="2231" alt="Screenshot 2023-02-24 at 15 13 59"
src="https://user-images.githubusercontent.com/6417322/221200130-53c1ff25-4c47-4532-885f-5c4f9dadb05e.png">
# Embeddings
Really quickly: embeddings are a semantic representation of text.
Embeddings are usually floating-point vectors with 256+ elements. The
neat thing about embeddings is that they allow us to search over textual
information using a semantic correlation between the query and the text,
not just syntactic (matching keywords).
In this PR, we implemented an embedding service that will allow us to do
semantic code search over repositories in Sourcegraph. So, for example,
you'll be able to ask, "how do access tokens work in Sourcegraph", and
it will give you a list of the closest matching code files.
Additionally, we build a context detection service powered by
embeddings. In chat applications, it is important to know whether the
user's message requires additional context. We have to differentiate
between two cases: the user asks a general question about the codebase,
or the user references something in the existing conversation. In the
latter case, including the context would ruin the flow of the
conversation, and the chatbot would most likely return a confusing
answer. We determine whether a query _does not_ require additional
context using two approaches:
1. We check if the query contains well-known phrases that would indicate
the user is referencing the existing conversation (e.g., translate
previous, change that)
1. We have a static dataset of messages that require context and a
dataset of messages that do not. We embed both datasets, and then, using
embedding similarity, we can check which set is more similar to the
query.
## GraphQL API
We add four new resolvers to the GraphQL API:
```graphql
extend type Query {
embeddingsSearch(repo: ID!, query: String!, codeResultsCount: Int!, textResultsCount: Int!): EmbeddingsSearchResults!
isContextRequiredForQuery(query: String!): Boolean!
}
extend type Mutation {
scheduleRepositoriesForEmbedding(repoNames: [String!]!): EmptyResponse!
scheduleContextDetectionForEmbedding: EmptyResponse!
}
```
- `embeddingsSearch` performs embeddings search over the repo embeddings
and returns the specified number of results
- `isContextRequiredForQuery` determines whether the given query
requires additional context
- `scheduleRepositoriesForEmbedding` schedules a repo embedding
background job
- `scheduleContextDetectionForEmbedding` schedules a context detection
embedding background job that embeds a static dataset of messages.
## Repo embedding background job
Embedding a repository is implemented as a background job. The
background job handler receives the repository and the revision, which
should be embedded. Handler then gathers a list of files from the
gitserver and excludes files >1MB in size. The list of files is split
into code and text files (.md, .txt), and we build a separate embedding
index for both. We split them because in a combined index, the text
files always tended to feature as top results and didn't leave any room
for code files. Once we have the list of files, the procedure is as
follows:
- For each file
- Get file contents from gitserver
- Check if the file is embeddable (is not autogenerated, is large
enough, does not have long lines)
- Split the file into embeddable chunks
- Embed the file chunks using an external embedding service (defined in
site config)
- Add embedded file chunks and metadata to the index
- Metadata contains the file name, the start line, and the end line of
the chunk
- Once all files are processed, the index is marshaled into JSON and
stored in Cloud storage (GCS, S3)
### Site config changes
As mentioned, we use a configurable external embedding API that does the
actual text -> vector embedding part. Ideally, this allows us to swap
embedding providers in the future.
```json
"embeddings": {
"description": "Configuration for embeddings service.",
"type": "object",
"required": ["enabled", "dimensions", "model", "accessToken", "url"],
"properties": {
"enabled": {
"description": "Toggles whether embedding service is enabled.",
"type": "boolean",
"default": false
},
"dimensions": {
"description": "The dimensionality of the embedding vectors.",
"type": "integer",
"minimum": 0
},
"model": {
"description": "The model used for embedding.",
"type": "string"
},
"accessToken": {
"description": "The access token used to authenticate with the external embedding API service.",
"type": "string"
},
"url": {
"description": "The url to the external embedding API service.",
"type": "string",
"format": "uri"
}
}
}
```
## Repo embeddings search
The repo embeddings search is implemented in its own service. When a
user queries a repo using embeddings search, the following happens:
- Download the repo embedding index from blob storage and cache it in
memory
- We cache up to 5 embedding indexes in memory
- Embed the query and use the embedded query vector to find similar code
and text file metadata in the embedding index
- Query gitserver for the actual file contents
- Return the results
## Interesting files
- [Similarity
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-102cc83520004eb0e2795e49bc435c5142ca555189b1db3a52bbf1ffb82fa3c6)
- [Repo embedding job
handler](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-c345f373f426398beb4b9cd5852ba862a2718687882db2a8b2d9c7fbb5f1dc52)
- [External embedding api
client](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-ad1e7956f518e4bcaee17dd9e7ac04a5e090c00d970fcd273919e887e1d2cf8f)
- [Embedding a
repo](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-1f35118727128095b7816791b6f0a2e0e060cddee43d25102859b8159465585c)
- [Embeddings searcher
service](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-5b20f3e7ef87041daeeaef98b58ebf7388519cedcdfc359dc5e6d4e0b021472e)
- [Embeddings
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-79f95b9cc3f1ef39c1a0b88015bd9cd6c19c30a8d4c147409f1b8e8cd9462ea1)
- [Repo embedding index cache
management](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-8a41f7dec31054889dbf86e97c52223d5636b4d408c6b375bcfc09160a8b70f8)
- [GraphQL
resolvers](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-9b30a0b5efcb63e2f4611b99ab137fbe09629a769a4f30d10a1b2da41a01d21f)
## Test plan
- Start by filling out the `embeddings` object in the site config (let
me know if you need an API key)
- Start the embeddings service using `sg start embeddings`
- Go to the `/api/console` page and schedule a repo embedding job and a
context detection embedding job:
```graphql
mutation {
scheduleRepositoriesForEmbedding(repoNames: ["github.com/sourcegraph/handbook"]) {
__typename
}
scheduleContextDetectionForEmbedding {
__typename
}
}
```
- Once both are finished, you should be able to query the repo embedding
index, and determine whether context is need for a given query:
```graphql
query {
isContextRequiredForQuery(query: "how do access tokens work")
embeddingsSearch(
repo: "UmVwb3NpdG9yeToy", # github.com/sourcegraph/handbook GQL ID
query: "how do access tokens work",
codeResultsCount: 5,
textResultsCount: 5) {
codeResults {
fileName
content
}
textResults {
fileName
content
}
}
}
```
2023-03-01 09:50:12 +00:00
|
|
|
embeddings:
|
2023-03-24 17:11:38 +00:00
|
|
|
cmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
.bin/embeddings
|
embeddings: searcher and indexer (#48017)
# High-level architecture overview
<img width="2231" alt="Screenshot 2023-02-24 at 15 13 59"
src="https://user-images.githubusercontent.com/6417322/221200130-53c1ff25-4c47-4532-885f-5c4f9dadb05e.png">
# Embeddings
Really quickly: embeddings are a semantic representation of text.
Embeddings are usually floating-point vectors with 256+ elements. The
neat thing about embeddings is that they allow us to search over textual
information using a semantic correlation between the query and the text,
not just syntactic (matching keywords).
In this PR, we implemented an embedding service that will allow us to do
semantic code search over repositories in Sourcegraph. So, for example,
you'll be able to ask, "how do access tokens work in Sourcegraph", and
it will give you a list of the closest matching code files.
Additionally, we build a context detection service powered by
embeddings. In chat applications, it is important to know whether the
user's message requires additional context. We have to differentiate
between two cases: the user asks a general question about the codebase,
or the user references something in the existing conversation. In the
latter case, including the context would ruin the flow of the
conversation, and the chatbot would most likely return a confusing
answer. We determine whether a query _does not_ require additional
context using two approaches:
1. We check if the query contains well-known phrases that would indicate
the user is referencing the existing conversation (e.g., translate
previous, change that)
1. We have a static dataset of messages that require context and a
dataset of messages that do not. We embed both datasets, and then, using
embedding similarity, we can check which set is more similar to the
query.
## GraphQL API
We add four new resolvers to the GraphQL API:
```graphql
extend type Query {
embeddingsSearch(repo: ID!, query: String!, codeResultsCount: Int!, textResultsCount: Int!): EmbeddingsSearchResults!
isContextRequiredForQuery(query: String!): Boolean!
}
extend type Mutation {
scheduleRepositoriesForEmbedding(repoNames: [String!]!): EmptyResponse!
scheduleContextDetectionForEmbedding: EmptyResponse!
}
```
- `embeddingsSearch` performs embeddings search over the repo embeddings
and returns the specified number of results
- `isContextRequiredForQuery` determines whether the given query
requires additional context
- `scheduleRepositoriesForEmbedding` schedules a repo embedding
background job
- `scheduleContextDetectionForEmbedding` schedules a context detection
embedding background job that embeds a static dataset of messages.
## Repo embedding background job
Embedding a repository is implemented as a background job. The
background job handler receives the repository and the revision, which
should be embedded. Handler then gathers a list of files from the
gitserver and excludes files >1MB in size. The list of files is split
into code and text files (.md, .txt), and we build a separate embedding
index for both. We split them because in a combined index, the text
files always tended to feature as top results and didn't leave any room
for code files. Once we have the list of files, the procedure is as
follows:
- For each file
- Get file contents from gitserver
- Check if the file is embeddable (is not autogenerated, is large
enough, does not have long lines)
- Split the file into embeddable chunks
- Embed the file chunks using an external embedding service (defined in
site config)
- Add embedded file chunks and metadata to the index
- Metadata contains the file name, the start line, and the end line of
the chunk
- Once all files are processed, the index is marshaled into JSON and
stored in Cloud storage (GCS, S3)
### Site config changes
As mentioned, we use a configurable external embedding API that does the
actual text -> vector embedding part. Ideally, this allows us to swap
embedding providers in the future.
```json
"embeddings": {
"description": "Configuration for embeddings service.",
"type": "object",
"required": ["enabled", "dimensions", "model", "accessToken", "url"],
"properties": {
"enabled": {
"description": "Toggles whether embedding service is enabled.",
"type": "boolean",
"default": false
},
"dimensions": {
"description": "The dimensionality of the embedding vectors.",
"type": "integer",
"minimum": 0
},
"model": {
"description": "The model used for embedding.",
"type": "string"
},
"accessToken": {
"description": "The access token used to authenticate with the external embedding API service.",
"type": "string"
},
"url": {
"description": "The url to the external embedding API service.",
"type": "string",
"format": "uri"
}
}
}
```
## Repo embeddings search
The repo embeddings search is implemented in its own service. When a
user queries a repo using embeddings search, the following happens:
- Download the repo embedding index from blob storage and cache it in
memory
- We cache up to 5 embedding indexes in memory
- Embed the query and use the embedded query vector to find similar code
and text file metadata in the embedding index
- Query gitserver for the actual file contents
- Return the results
## Interesting files
- [Similarity
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-102cc83520004eb0e2795e49bc435c5142ca555189b1db3a52bbf1ffb82fa3c6)
- [Repo embedding job
handler](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-c345f373f426398beb4b9cd5852ba862a2718687882db2a8b2d9c7fbb5f1dc52)
- [External embedding api
client](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-ad1e7956f518e4bcaee17dd9e7ac04a5e090c00d970fcd273919e887e1d2cf8f)
- [Embedding a
repo](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-1f35118727128095b7816791b6f0a2e0e060cddee43d25102859b8159465585c)
- [Embeddings searcher
service](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-5b20f3e7ef87041daeeaef98b58ebf7388519cedcdfc359dc5e6d4e0b021472e)
- [Embeddings
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-79f95b9cc3f1ef39c1a0b88015bd9cd6c19c30a8d4c147409f1b8e8cd9462ea1)
- [Repo embedding index cache
management](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-8a41f7dec31054889dbf86e97c52223d5636b4d408c6b375bcfc09160a8b70f8)
- [GraphQL
resolvers](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-9b30a0b5efcb63e2f4611b99ab137fbe09629a769a4f30d10a1b2da41a01d21f)
## Test plan
- Start by filling out the `embeddings` object in the site config (let
me know if you need an API key)
- Start the embeddings service using `sg start embeddings`
- Go to the `/api/console` page and schedule a repo embedding job and a
context detection embedding job:
```graphql
mutation {
scheduleRepositoriesForEmbedding(repoNames: ["github.com/sourcegraph/handbook"]) {
__typename
}
scheduleContextDetectionForEmbedding {
__typename
}
}
```
- Once both are finished, you should be able to query the repo embedding
index, and determine whether context is need for a given query:
```graphql
query {
isContextRequiredForQuery(query: "how do access tokens work")
embeddingsSearch(
repo: "UmVwb3NpdG9yeToy", # github.com/sourcegraph/handbook GQL ID
query: "how do access tokens work",
codeResultsCount: 5,
textResultsCount: 5) {
codeResults {
fileName
content
}
textResults {
fileName
content
}
}
}
```
2023-03-01 09:50:12 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/embeddings github.com/sourcegraph/sourcegraph/enterprise/cmd/embeddings
|
|
|
|
|
checkBinary: .bin/embeddings
|
|
|
|
|
watch:
|
|
|
|
|
- lib
|
|
|
|
|
- internal
|
|
|
|
|
- enterprise/cmd/embeddings
|
|
|
|
|
- enterprise/internal/embeddings
|
|
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
searcher:
|
|
|
|
|
cmd: .bin/searcher
|
2021-10-09 01:47:08 +00:00
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/searcher github.com/sourcegraph/sourcegraph/cmd/searcher
|
2021-06-02 09:38:06 +00:00
|
|
|
checkBinary: .bin/searcher
|
2021-03-29 08:14:53 +00:00
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-03-29 08:14:53 +00:00
|
|
|
- internal
|
|
|
|
|
- cmd/searcher
|
|
|
|
|
|
|
|
|
|
caddy:
|
2021-05-06 14:58:47 +00:00
|
|
|
ignoreStdout: true
|
|
|
|
|
ignoreStderr: true
|
2022-04-26 06:35:54 +00:00
|
|
|
cmd: .bin/caddy_${CADDY_VERSION} run --watch --config=dev/Caddyfile
|
2022-05-23 07:58:53 +00:00
|
|
|
install_func: installCaddy
|
2021-03-29 08:14:53 +00:00
|
|
|
env:
|
2021-10-19 22:06:22 +00:00
|
|
|
CADDY_VERSION: 2.4.5
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
oss-web:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Open source version of the web app
|
2021-09-01 08:41:58 +00:00
|
|
|
cmd: ./node_modules/.bin/gulp --color dev
|
2023-01-12 03:50:09 +00:00
|
|
|
install: pnpm install
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
web:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Enterprise version of the web app
|
2021-09-01 08:41:58 +00:00
|
|
|
cmd: ./node_modules/.bin/gulp --color dev
|
2023-01-12 03:50:09 +00:00
|
|
|
install: pnpm install
|
2021-04-01 11:59:48 +00:00
|
|
|
env:
|
|
|
|
|
ENTERPRISE: 1
|
2022-08-02 14:09:41 +00:00
|
|
|
ENABLE_OPEN_TELEMETRY: true
|
2021-05-07 11:01:10 +00:00
|
|
|
|
2021-10-22 10:30:29 +00:00
|
|
|
web-standalone-http:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Standalone web frontend (dev) with API proxy to a configurable URL
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm --filter @sourcegraph/web serve:dev --color
|
2022-07-06 09:51:02 +00:00
|
|
|
install: |
|
2023-01-12 03:50:09 +00:00
|
|
|
pnpm install
|
|
|
|
|
pnpm generate
|
2021-05-07 11:01:10 +00:00
|
|
|
env:
|
|
|
|
|
WEBPACK_SERVE_INDEX: true
|
2022-01-28 14:15:56 +00:00
|
|
|
SOURCEGRAPH_API_URL: https://k8s.sgdev.org
|
2021-05-07 11:01:10 +00:00
|
|
|
|
2021-10-22 10:30:29 +00:00
|
|
|
web-standalone-http-prod:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Standalone web frontend (production) with API proxy to a configurable URL
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm --filter @sourcegraph/web serve:prod
|
|
|
|
|
install: pnpm --filter @sourcegraph/web run build
|
2021-05-07 11:01:10 +00:00
|
|
|
env:
|
|
|
|
|
NODE_ENV: production
|
|
|
|
|
WEBPACK_SERVE_INDEX: true
|
|
|
|
|
SOURCEGRAPH_API_URL: https://k8s.sgdev.org
|
|
|
|
|
|
2022-09-09 09:36:10 +00:00
|
|
|
web-integration-build:
|
2023-01-30 06:51:24 +00:00
|
|
|
description: Build development web application for integration tests
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm --filter @sourcegraph/web run build
|
2022-09-09 09:36:10 +00:00
|
|
|
env:
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
INTEGRATION_TESTS: true
|
|
|
|
|
|
2023-01-30 06:51:24 +00:00
|
|
|
web-integration-build-prod:
|
|
|
|
|
description: Build production web application for integration tests
|
|
|
|
|
cmd: pnpm --filter @sourcegraph/web run build
|
|
|
|
|
env:
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
INTEGRATION_TESTS: true
|
|
|
|
|
NODE_ENV: production
|
|
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
docsite:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Docsite instance serving the docs
|
2021-08-05 08:19:45 +00:00
|
|
|
cmd: .bin/docsite_${DOCSITE_VERSION} -config doc/docsite.json serve -http=localhost:5080
|
2022-07-26 14:51:04 +00:00
|
|
|
install_func: 'installDocsite'
|
2021-03-29 08:14:53 +00:00
|
|
|
env:
|
2023-03-28 10:21:52 +00:00
|
|
|
DOCSITE_VERSION: v1.9.2 # Update in all places (including outside this repo)
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2021-09-28 04:18:12 +00:00
|
|
|
syntax-highlighter:
|
2021-05-06 14:58:47 +00:00
|
|
|
ignoreStdout: true
|
|
|
|
|
ignoreStderr: true
|
2021-03-29 08:14:53 +00:00
|
|
|
cmd: |
|
2021-09-28 04:18:12 +00:00
|
|
|
docker run --name=syntax-highlighter --rm -p9238:9238 \
|
2021-03-29 08:14:53 +00:00
|
|
|
-e WORKERS=1 -e ROCKET_ADDRESS=0.0.0.0 \
|
2022-03-01 18:19:16 +00:00
|
|
|
sourcegraph/syntax-highlighter:insiders
|
2021-09-28 04:18:12 +00:00
|
|
|
install: |
|
|
|
|
|
# Remove containers by the old name, too.
|
|
|
|
|
docker inspect syntect_server >/dev/null 2>&1 && docker rm -f syntect_server || true
|
|
|
|
|
docker inspect syntax-highlighter >/dev/null 2>&1 && docker rm -f syntax-highlighter || true
|
2022-12-05 13:06:34 +00:00
|
|
|
# Pull syntax-highlighter latest insider image, only during install, but
|
|
|
|
|
# skip if OFFLINE=true is set.
|
|
|
|
|
if [[ "$OFFLINE" != "true" ]]; then
|
|
|
|
|
docker pull -q sourcegraph/syntax-highlighter:insiders
|
|
|
|
|
fi
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2021-05-31 07:51:39 +00:00
|
|
|
zoekt-indexserver-template: &zoekt_indexserver_template
|
2021-03-29 08:14:53 +00:00
|
|
|
cmd: |
|
2021-08-20 08:42:05 +00:00
|
|
|
env PATH="${PWD}/.bin:$PATH" .bin/zoekt-sourcegraph-indexserver \
|
2021-03-29 08:14:53 +00:00
|
|
|
-sourcegraph_url 'http://localhost:3090' \
|
2021-05-21 16:01:11 +00:00
|
|
|
-index "$HOME/.sourcegraph/zoekt/index-$ZOEKT_NUM" \
|
|
|
|
|
-hostname "localhost:$ZOEKT_HOSTNAME_PORT" \
|
2021-03-29 08:14:53 +00:00
|
|
|
-interval 1m \
|
2022-05-06 11:37:11 +00:00
|
|
|
-listen "127.0.0.1:$ZOEKT_LISTEN_PORT" \
|
2021-03-29 08:14:53 +00:00
|
|
|
-cpu_fraction 0.25
|
|
|
|
|
install: |
|
|
|
|
|
mkdir -p .bin
|
|
|
|
|
export GOBIN="${PWD}/.bin"
|
2022-08-17 22:30:49 +00:00
|
|
|
go install github.com/sourcegraph/zoekt/cmd/zoekt-archive-index
|
|
|
|
|
go install github.com/sourcegraph/zoekt/cmd/zoekt-git-index
|
|
|
|
|
go install github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver
|
2021-06-02 09:38:06 +00:00
|
|
|
checkBinary: .bin/zoekt-sourcegraph-indexserver
|
2021-05-21 16:01:11 +00:00
|
|
|
env: &zoektenv
|
2022-12-06 17:32:21 +00:00
|
|
|
CTAGS_COMMAND: dev/universal-ctags-dev
|
2023-03-17 02:54:00 +00:00
|
|
|
GRPC_ENABLED: true
|
2021-04-01 11:59:48 +00:00
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
zoekt-index-0:
|
2021-05-31 07:51:39 +00:00
|
|
|
<<: *zoekt_indexserver_template
|
2021-05-21 16:01:11 +00:00
|
|
|
env:
|
|
|
|
|
<<: *zoektenv
|
|
|
|
|
ZOEKT_NUM: 0
|
|
|
|
|
ZOEKT_HOSTNAME_PORT: 3070
|
|
|
|
|
ZOEKT_LISTEN_PORT: 6072
|
|
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
zoekt-index-1:
|
2021-05-31 07:51:39 +00:00
|
|
|
<<: *zoekt_indexserver_template
|
2021-03-29 08:14:53 +00:00
|
|
|
env:
|
2021-05-21 16:01:11 +00:00
|
|
|
<<: *zoektenv
|
|
|
|
|
ZOEKT_NUM: 1
|
|
|
|
|
ZOEKT_HOSTNAME_PORT: 3071
|
|
|
|
|
ZOEKT_LISTEN_PORT: 6073
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
zoekt-web-template: &zoekt_webserver_template
|
2021-03-29 08:14:53 +00:00
|
|
|
install: |
|
|
|
|
|
mkdir -p .bin
|
2022-08-17 22:30:49 +00:00
|
|
|
env GOBIN="${PWD}/.bin" go install github.com/sourcegraph/zoekt/cmd/zoekt-webserver
|
2021-06-02 09:38:06 +00:00
|
|
|
checkBinary: .bin/zoekt-webserver
|
2021-04-01 11:59:48 +00:00
|
|
|
env:
|
2022-08-29 16:42:36 +00:00
|
|
|
JAEGER_DISABLED: true
|
|
|
|
|
OPENTELEMETRY_DISABLED: false
|
2022-07-14 11:08:58 +00:00
|
|
|
GOGC: 25
|
2021-04-01 11:59:48 +00:00
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
zoekt-web-0:
|
2021-05-31 07:51:39 +00:00
|
|
|
<<: *zoekt_webserver_template
|
2022-12-02 07:38:44 +00:00
|
|
|
cmd: env PATH="${PWD}/.bin:$PATH" .bin/zoekt-webserver -index "$HOME/.sourcegraph/zoekt/index-0" -pprof -rpc -indexserver_proxy -listen "127.0.0.1:3070"
|
2021-05-31 07:51:39 +00:00
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
zoekt-web-1:
|
2021-05-31 07:51:39 +00:00
|
|
|
<<: *zoekt_webserver_template
|
2022-12-02 07:38:44 +00:00
|
|
|
cmd: env PATH="${PWD}/.bin:$PATH" .bin/zoekt-webserver -index "$HOME/.sourcegraph/zoekt/index-1" -pprof -rpc -indexserver_proxy -listen "127.0.0.1:3071"
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2022-06-20 17:57:54 +00:00
|
|
|
codeintel-worker:
|
2022-09-05 10:16:27 +00:00
|
|
|
cmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
.bin/codeintel-worker
|
2021-05-21 16:01:11 +00:00
|
|
|
install: |
|
2021-10-09 01:47:08 +00:00
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-08-04 21:21:45 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/codeintel-worker github.com/sourcegraph/sourcegraph/enterprise/cmd/precise-code-intel-worker
|
|
|
|
|
checkBinary: .bin/codeintel-worker
|
2021-05-21 16:01:11 +00:00
|
|
|
watch:
|
2021-05-31 07:51:39 +00:00
|
|
|
- lib
|
2021-05-21 16:01:11 +00:00
|
|
|
- internal
|
2021-05-31 07:51:39 +00:00
|
|
|
- enterprise/cmd/precise-code-intel-worker
|
2021-05-21 16:01:11 +00:00
|
|
|
- enterprise/internal
|
|
|
|
|
- lib/codeintel
|
|
|
|
|
|
2022-03-03 04:13:28 +00:00
|
|
|
executor-template:
|
|
|
|
|
&executor_template # TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`.
|
2021-07-05 11:06:52 +00:00
|
|
|
cmd: |
|
|
|
|
|
env TMPDIR="$HOME/.sourcegraph/executor-temp" .bin/executor
|
2021-05-31 07:51:39 +00:00
|
|
|
install: |
|
2021-10-09 01:47:08 +00:00
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2022-10-05 17:26:28 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/executor github.com/sourcegraph/sourcegraph/enterprise/cmd/executor
|
2021-06-02 09:38:06 +00:00
|
|
|
checkBinary: .bin/executor
|
2021-05-21 16:01:11 +00:00
|
|
|
env:
|
2023-03-28 21:18:56 +00:00
|
|
|
# Required for frontend and executor to communicate
|
|
|
|
|
EXECUTOR_FRONTEND_URL: http://localhost:3080
|
|
|
|
|
# Must match the secret defined in the site config.
|
|
|
|
|
EXECUTOR_FRONTEND_PASSWORD: hunter2hunter2hunter2
|
|
|
|
|
# Disable firecracker inside executor in dev
|
|
|
|
|
EXECUTOR_USE_FIRECRACKER: false
|
2021-07-05 11:06:52 +00:00
|
|
|
EXECUTOR_QUEUE_NAME: TEMPLATE
|
2021-05-31 07:51:39 +00:00
|
|
|
watch:
|
|
|
|
|
- lib
|
|
|
|
|
- internal
|
|
|
|
|
- enterprise/internal
|
|
|
|
|
- enterprise/cmd/executor
|
2021-05-21 16:01:11 +00:00
|
|
|
|
2021-07-05 11:06:52 +00:00
|
|
|
codeintel-executor:
|
|
|
|
|
<<: *executor_template
|
|
|
|
|
cmd: |
|
|
|
|
|
env TMPDIR="$HOME/.sourcegraph/indexer-temp" .bin/executor
|
|
|
|
|
env:
|
|
|
|
|
EXECUTOR_QUEUE_NAME: codeintel
|
|
|
|
|
|
2022-09-21 19:39:50 +00:00
|
|
|
# If you want to use this, either start it with `sg run batches-executor-firecracker` or
|
|
|
|
|
# modify the `commandsets.batches` in your local `sg.config.overwrite.yaml`
|
|
|
|
|
codeintel-executor-firecracker:
|
|
|
|
|
<<: *executor_template
|
|
|
|
|
cmd: |
|
|
|
|
|
env TMPDIR="$HOME/.sourcegraph/codeintel-executor-temp" \
|
2022-10-05 13:32:30 +00:00
|
|
|
sudo --preserve-env=TMPDIR,EXECUTOR_QUEUE_NAME,EXECUTOR_FRONTEND_URL,EXECUTOR_FRONTEND_PASSWORD,EXECUTOR_USE_FIRECRACKER \
|
2022-09-21 19:39:50 +00:00
|
|
|
.bin/executor
|
|
|
|
|
env:
|
|
|
|
|
EXECUTOR_USE_FIRECRACKER: true
|
|
|
|
|
EXECUTOR_QUEUE_NAME: codeintel
|
|
|
|
|
|
2021-07-05 11:06:52 +00:00
|
|
|
batches-executor:
|
|
|
|
|
<<: *executor_template
|
|
|
|
|
cmd: |
|
|
|
|
|
env TMPDIR="$HOME/.sourcegraph/batches-executor-temp" .bin/executor
|
|
|
|
|
env:
|
2021-09-22 10:03:57 +00:00
|
|
|
EXECUTOR_QUEUE_NAME: batches
|
2021-09-29 12:17:18 +00:00
|
|
|
EXECUTOR_MAXIMUM_NUM_JOBS: 8
|
2021-07-05 11:06:52 +00:00
|
|
|
|
Experiment: Natively run SSBC in docker (#44034)
This adds an experimental code path that I will use to test a docker-only execution mode for server-side batch changes. This code path is never executed for customers until we make the switch when we deem it ready. This will allow me to dogfood this while it's not available to customer instances yet.
Ultimately, the goal of this is to make executors simply be "the job runner platform through a generic interface". Today, this depends on src-cli to do a good bunch of the work. This is a blocker for going full docker-based with executors, which will ultimately be a requirement on the road to k8s-based executors.
As this removes the dependency on src-cli, nothing but the job interface and API endpoints tie executor and Sourcegraph instance together. Ultimately, this will allow us to support larger version spans between the two (pending executors going GA and being feature-complete).
Known issues/limitations:
Steps skipped in between steps that run don't work yet
Skipping steps dynamically is inefficient as we cannot tell the executor to skip a step IF X, so we replace the script by exit 0
It is unclear if all variants of file mounts still work. Basic cases do work. Files used to be read-only in src-cli, they aren't now, but content is still reset in between steps.
The assumption that everything operates in /work is broken here, because we need to use what executors give us to persist out-of-repo state in between containers (like the step result from the previous step)
It is unclear if workspace mounts work
Cache keys are not correctly computed if using workspace mounts - the metadataretriever is nil
We still use log outputs to transfer the AfterStepResults to the Sourcegraph instance, this should finally become an artifact instead. Then, we don't have to rely on the execution_log_entires anymore and can theoretically prune those after some time. This column is currently growing indefinitely.
It depends on tee being available in the docker images to capture the cmd.stdout/cmd.stderr properly for template variable rendering
Env-vars are not rendered in their evaluated form post-execution
File permissions are unclear and might be similarly broken to how they are now - or even worse
Disclaimer: It's not feature complete today! But it is also not hitting any default code paths either. As development on this goes on, we can eventually remove the feature flag and run the new job format on all instances. This PR handles fallback of rendering old records correctly in the UI already.
2022-11-09 23:20:43 +00:00
|
|
|
# This tool rebuilds the batcheshelper image every time the source of it is changed.
|
|
|
|
|
batcheshelper-builder:
|
|
|
|
|
# Nothing to run for this, we just want to re-run the install script every time.
|
|
|
|
|
cmd: exit 0
|
|
|
|
|
install: ./enterprise/cmd/batcheshelper/build.sh
|
|
|
|
|
env:
|
|
|
|
|
IMAGE: sourcegraph/batcheshelper:insiders
|
|
|
|
|
# TODO: This is required but should only be set on M1 Macs.
|
|
|
|
|
PLATFORM: linux/arm64
|
|
|
|
|
watch:
|
|
|
|
|
- enterprise/cmd/batcheshelper
|
|
|
|
|
- lib/batches
|
|
|
|
|
continueWatchOnExit: true
|
|
|
|
|
|
2021-07-23 07:59:43 +00:00
|
|
|
# If you want to use this, either start it with `sg run batches-executor-firecracker` or
|
2021-07-22 15:09:36 +00:00
|
|
|
# modify the `commandsets.batches` in your local `sg.config.overwrite.yaml`
|
|
|
|
|
batches-executor-firecracker:
|
|
|
|
|
<<: *executor_template
|
|
|
|
|
cmd: |
|
|
|
|
|
env TMPDIR="$HOME/.sourcegraph/batches-executor-temp" \
|
2022-10-05 13:32:30 +00:00
|
|
|
sudo --preserve-env=TMPDIR,EXECUTOR_QUEUE_NAME,EXECUTOR_FRONTEND_URL,EXECUTOR_FRONTEND_PASSWORD,EXECUTOR_USE_FIRECRACKER \
|
2021-07-22 15:09:36 +00:00
|
|
|
.bin/executor
|
|
|
|
|
env:
|
|
|
|
|
EXECUTOR_USE_FIRECRACKER: true
|
2021-09-22 10:03:57 +00:00
|
|
|
EXECUTOR_QUEUE_NAME: batches
|
2021-07-22 15:09:36 +00:00
|
|
|
|
2022-11-30 23:04:48 +00:00
|
|
|
blobstore:
|
2023-04-03 22:15:13 +00:00
|
|
|
cmd: .bin/blobstore
|
2021-05-21 16:01:11 +00:00
|
|
|
install: |
|
2023-04-03 22:15:13 +00:00
|
|
|
# Ensure the old blobstore Docker container is not running
|
|
|
|
|
docker rm -f blobstore
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
|
|
|
|
go build -gcflags="$GCFLAGS" -o .bin/blobstore github.com/sourcegraph/sourcegraph/cmd/blobstore
|
|
|
|
|
checkBinary: .bin/blobstore
|
|
|
|
|
watch:
|
|
|
|
|
- lib
|
|
|
|
|
- internal
|
|
|
|
|
- cmd/blobstore
|
2021-05-21 16:01:11 +00:00
|
|
|
env:
|
2023-04-03 22:15:13 +00:00
|
|
|
BLOBSTORE_DATA_DIR: $HOME/.sourcegraph-dev/data/blobstore-go
|
2021-05-21 16:01:11 +00:00
|
|
|
|
2021-07-19 16:15:00 +00:00
|
|
|
redis-postgres:
|
|
|
|
|
# Add the following overwrites to your sg.config.overwrite.yaml to use the docker-compose
|
|
|
|
|
# database:
|
|
|
|
|
#
|
|
|
|
|
# env:
|
2022-06-02 16:06:54 +00:00
|
|
|
# PGHOST: localhost
|
2021-07-19 16:15:00 +00:00
|
|
|
# PGPASSWORD: sourcegraph
|
|
|
|
|
# PGUSER: sourcegraph
|
|
|
|
|
#
|
|
|
|
|
# You could also add an overwrite to add `redis-postgres` to the relevant command set(s).
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Dockerized version of redis and postgres
|
2021-09-29 11:57:55 +00:00
|
|
|
cmd: docker-compose -f dev/redis-postgres.yml up $COMPOSE_ARGS
|
|
|
|
|
env:
|
|
|
|
|
COMPOSE_ARGS: --force-recreate
|
2021-07-19 16:15:00 +00:00
|
|
|
|
2021-08-02 08:25:39 +00:00
|
|
|
jaeger:
|
|
|
|
|
cmd: |
|
2022-07-20 04:34:16 +00:00
|
|
|
echo "Jaeger will be available on http://localhost:16686/-/debug/jaeger/search"
|
2022-04-27 07:26:00 +00:00
|
|
|
.bin/jaeger-all-in-one-${JAEGER_VERSION} --log-level ${JAEGER_LOG_LEVEL}
|
2022-06-07 15:46:15 +00:00
|
|
|
install_func: installJaeger
|
2021-08-02 08:25:39 +00:00
|
|
|
env:
|
2022-07-20 04:34:16 +00:00
|
|
|
JAEGER_VERSION: 1.36.0
|
2021-08-02 08:25:39 +00:00
|
|
|
JAEGER_DISK: $HOME/.sourcegraph-dev/data/jaeger
|
2021-11-03 10:15:18 +00:00
|
|
|
JAEGER_LOG_LEVEL: error
|
2021-08-02 08:25:39 +00:00
|
|
|
QUERY_BASE_PATH: /-/debug/jaeger
|
|
|
|
|
|
|
|
|
|
grafana:
|
|
|
|
|
cmd: |
|
2022-08-19 11:55:58 +00:00
|
|
|
if [[ $(uname) == "Linux" ]]; then
|
2021-08-02 11:28:54 +00:00
|
|
|
# Linux needs an extra arg to support host.internal.docker, which is how grafana connects
|
|
|
|
|
# to the prometheus backend.
|
|
|
|
|
ADD_HOST_FLAG="--add-host=host.docker.internal:host-gateway"
|
|
|
|
|
|
|
|
|
|
# Docker users on Linux will generally be using direct user mapping, which
|
|
|
|
|
# means that they'll want the data in the volume mount to be owned by the
|
|
|
|
|
# same user as is running this script. Fortunately, the Grafana container
|
|
|
|
|
# doesn't really care what user it runs as, so long as it can write to
|
|
|
|
|
# /var/lib/grafana.
|
|
|
|
|
DOCKER_USER="--user=$UID"
|
|
|
|
|
fi
|
|
|
|
|
|
2021-08-02 08:25:39 +00:00
|
|
|
echo "Grafana: serving on http://localhost:${PORT}"
|
|
|
|
|
echo "Grafana: note that logs are piped to ${GRAFANA_LOG_FILE}"
|
|
|
|
|
docker run --rm ${DOCKER_USER} \
|
|
|
|
|
--name=${CONTAINER} \
|
|
|
|
|
--cpus=1 \
|
|
|
|
|
--memory=1g \
|
|
|
|
|
-p 0.0.0.0:3370:3370 ${ADD_HOST_FLAG} \
|
|
|
|
|
-v "${GRAFANA_DISK}":/var/lib/grafana \
|
|
|
|
|
-v "$(pwd)"/dev/grafana/all:/sg_config_grafana/provisioning/datasources \
|
2021-09-14 18:36:04 +00:00
|
|
|
sourcegraph/grafana:dev >"${GRAFANA_LOG_FILE}" 2>&1
|
2021-08-02 08:25:39 +00:00
|
|
|
install: |
|
2021-08-02 11:28:54 +00:00
|
|
|
mkdir -p "${GRAFANA_DISK}"
|
2021-08-02 08:25:39 +00:00
|
|
|
mkdir -p "$(dirname ${GRAFANA_LOG_FILE})"
|
2022-08-04 21:21:45 +00:00
|
|
|
export CACHE=true
|
2021-08-02 08:25:39 +00:00
|
|
|
docker inspect $CONTAINER >/dev/null 2>&1 && docker rm -f $CONTAINER
|
2021-09-14 18:36:04 +00:00
|
|
|
./docker-images/grafana/build.sh
|
2021-08-02 08:25:39 +00:00
|
|
|
env:
|
|
|
|
|
GRAFANA_DISK: $HOME/.sourcegraph-dev/data/grafana
|
|
|
|
|
# Log file location: since we log outside of the Docker container, we should
|
|
|
|
|
# log somewhere that's _not_ ~/.sourcegraph-dev/data/grafana, since that gets
|
|
|
|
|
# volume mounted into the container and therefore has its own ownership
|
|
|
|
|
# semantics.
|
|
|
|
|
# Now for the actual logging. Grafana's output gets sent to stdout and stderr.
|
|
|
|
|
# We want to capture that output, but because it's fairly noisy, don't want to
|
|
|
|
|
# display it in the normal case.
|
2021-08-02 11:28:54 +00:00
|
|
|
GRAFANA_LOG_FILE: $HOME/.sourcegraph-dev/logs/grafana/grafana.log
|
2021-11-08 17:09:12 +00:00
|
|
|
IMAGE: sourcegraph/grafana:dev
|
2021-08-02 08:25:39 +00:00
|
|
|
CONTAINER: grafana
|
|
|
|
|
PORT: 3370
|
|
|
|
|
# docker containers must access things via docker host on non-linux platforms
|
2022-03-03 04:13:28 +00:00
|
|
|
DOCKER_USER: ''
|
|
|
|
|
ADD_HOST_FLAG: ''
|
2021-09-14 18:36:04 +00:00
|
|
|
CACHE: false
|
2021-08-02 08:25:39 +00:00
|
|
|
|
|
|
|
|
prometheus:
|
2021-08-02 11:28:54 +00:00
|
|
|
cmd: |
|
2022-08-19 11:55:58 +00:00
|
|
|
if [[ $(uname) == "Linux" ]]; then
|
2021-08-02 11:28:54 +00:00
|
|
|
DOCKER_USER="--user=$UID"
|
|
|
|
|
|
|
|
|
|
# Frontend generally runs outside of Docker, so to access it we need to be
|
|
|
|
|
# able to access ports on the host. --net=host is a very dirty way of
|
|
|
|
|
# enabling this.
|
|
|
|
|
DOCKER_NET="--net=host"
|
|
|
|
|
SRC_FRONTEND_INTERNAL="localhost:3090"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo "Prometheus: serving on http://localhost:${PORT}"
|
|
|
|
|
echo "Prometheus: note that logs are piped to ${PROMETHEUS_LOG_FILE}"
|
|
|
|
|
docker run --rm ${DOCKER_NET} ${DOCKER_USER} \
|
|
|
|
|
--name=${CONTAINER} \
|
|
|
|
|
--cpus=1 \
|
|
|
|
|
--memory=4g \
|
|
|
|
|
-p 0.0.0.0:9090:9090 \
|
|
|
|
|
-v "${PROMETHEUS_DISK}":/prometheus \
|
|
|
|
|
-v "$(pwd)/${CONFIG_DIR}":/sg_prometheus_add_ons \
|
|
|
|
|
-e SRC_FRONTEND_INTERNAL="${SRC_FRONTEND_INTERNAL}" \
|
|
|
|
|
-e DISABLE_SOURCEGRAPH_CONFIG="${DISABLE_SOURCEGRAPH_CONFIG:-""}" \
|
|
|
|
|
-e DISABLE_ALERTMANAGER="${DISABLE_ALERTMANAGER:-""}" \
|
2023-01-13 01:44:57 +00:00
|
|
|
-e PROMETHEUS_ADDITIONAL_FLAGS="--web.enable-lifecycle --web.enable-admin-api" \
|
2021-08-02 11:28:54 +00:00
|
|
|
${IMAGE} >"${PROMETHEUS_LOG_FILE}" 2>&1
|
|
|
|
|
install: |
|
|
|
|
|
mkdir -p "${PROMETHEUS_DISK}"
|
|
|
|
|
mkdir -p "$(dirname ${PROMETHEUS_LOG_FILE})"
|
|
|
|
|
|
|
|
|
|
docker inspect $CONTAINER >/dev/null 2>&1 && docker rm -f $CONTAINER
|
|
|
|
|
|
2022-08-19 11:55:58 +00:00
|
|
|
if [[ $(uname) == "Linux" ]]; then
|
2021-08-02 11:28:54 +00:00
|
|
|
PROM_TARGETS="dev/prometheus/linux/prometheus_targets.yml"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
cp ${PROM_TARGETS} "${CONFIG_DIR}"/prometheus_targets.yml
|
|
|
|
|
CACHE=true ./docker-images/prometheus/build.sh
|
|
|
|
|
env:
|
|
|
|
|
PROMETHEUS_DISK: $HOME/.sourcegraph-dev/data/prometheus
|
|
|
|
|
# See comment above for `grafana`
|
|
|
|
|
PROMETHEUS_LOG_FILE: $HOME/.sourcegraph-dev/logs/prometheus/prometheus.log
|
|
|
|
|
IMAGE: sourcegraph/prometheus:dev
|
|
|
|
|
CONTAINER: prometheus
|
|
|
|
|
PORT: 9090
|
|
|
|
|
CONFIG_DIR: docker-images/prometheus/config
|
2022-03-03 04:13:28 +00:00
|
|
|
DOCKER_USER: ''
|
|
|
|
|
DOCKER_NET: ''
|
2021-08-02 11:28:54 +00:00
|
|
|
PROM_TARGETS: dev/prometheus/all/prometheus_targets.yml
|
|
|
|
|
SRC_FRONTEND_INTERNAL: host.docker.internal:3090
|
2022-03-03 04:13:28 +00:00
|
|
|
ADD_HOST_FLAG: ''
|
2021-08-02 11:28:54 +00:00
|
|
|
DISABLE_SOURCEGRAPH_CONFIG: false
|
2021-08-02 08:25:39 +00:00
|
|
|
|
|
|
|
|
postgres_exporter:
|
|
|
|
|
cmd: ./dev/postgres_exporter.sh
|
|
|
|
|
|
2021-08-13 17:50:27 +00:00
|
|
|
monitoring-generator:
|
2022-12-19 17:49:25 +00:00
|
|
|
cmd: (cd monitoring/ && go generate ./... )
|
2021-08-13 17:50:27 +00:00
|
|
|
env:
|
|
|
|
|
RELOAD: true
|
|
|
|
|
watch:
|
|
|
|
|
- monitoring
|
|
|
|
|
continueWatchOnExit: true
|
|
|
|
|
|
2021-10-12 15:14:50 +00:00
|
|
|
loki:
|
|
|
|
|
cmd: |
|
2021-11-05 23:16:36 +00:00
|
|
|
echo "Loki: serving on http://localhost:3100"
|
2021-10-12 15:14:50 +00:00
|
|
|
echo "Loki: note that logs are piped to ${LOKI_LOG_FILE}"
|
|
|
|
|
docker run --rm --name=loki \
|
|
|
|
|
-p 3100:3100 -v $LOKI_DISK:/loki \
|
|
|
|
|
index.docker.io/grafana/loki:$LOKI_VERSION >"${LOKI_LOG_FILE}" 2>&1
|
|
|
|
|
install: |
|
|
|
|
|
mkdir -p "${LOKI_DISK}"
|
|
|
|
|
mkdir -p "$(dirname ${LOKI_LOG_FILE})"
|
|
|
|
|
docker pull index.docker.io/grafana/loki:$LOKI_VERSION
|
|
|
|
|
env:
|
|
|
|
|
LOKI_DISK: $HOME/.sourcegraph-dev/data/loki
|
2022-03-03 04:13:28 +00:00
|
|
|
LOKI_VERSION: '2.3.0'
|
2021-10-12 15:14:50 +00:00
|
|
|
LOKI_LOG_FILE: $HOME/.sourcegraph-dev/logs/loki/loki.log
|
|
|
|
|
|
2022-07-05 18:28:15 +00:00
|
|
|
otel-collector:
|
2022-07-20 19:06:23 +00:00
|
|
|
install: docker-images/opentelemetry-collector/build.sh
|
2022-07-26 14:51:04 +00:00
|
|
|
description: OpenTelemetry collector
|
2022-07-05 18:28:15 +00:00
|
|
|
cmd: |
|
|
|
|
|
JAEGER_HOST='host.docker.internal'
|
2022-08-19 11:55:58 +00:00
|
|
|
if [[ $(uname) == "Linux" ]]; then
|
2022-07-05 18:28:15 +00:00
|
|
|
# Jaeger generally runs outside of Docker, so to access it we need to be
|
|
|
|
|
# able to access ports on the host, because the Docker host only exists on
|
|
|
|
|
# MacOS. --net=host is a very dirty way of enabling this.
|
|
|
|
|
DOCKER_NET="--net=host"
|
|
|
|
|
JAEGER_HOST="localhost"
|
|
|
|
|
fi
|
|
|
|
|
|
2022-07-20 19:06:23 +00:00
|
|
|
docker container rm otel-collector
|
|
|
|
|
docker run --rm --name=otel-collector $DOCKER_NET $DOCKER_ARGS \
|
2022-10-18 10:23:21 +00:00
|
|
|
-p 4317:4317 -p 4318:4318 -p 55679:55679 -p 55670:55670 \
|
2022-12-19 12:18:51 +00:00
|
|
|
-p 8888:8888 \
|
2022-07-05 18:28:15 +00:00
|
|
|
-e JAEGER_HOST=$JAEGER_HOST \
|
2022-07-20 19:06:23 +00:00
|
|
|
-e HONEYCOMB_API_KEY=$HONEYCOMB_API_KEY \
|
|
|
|
|
-e HONEYCOMB_DATASET=$HONEYCOMB_DATASET \
|
|
|
|
|
$IMAGE --config "/etc/otel-collector/$CONFIGURATION_FILE"
|
2022-07-05 18:28:15 +00:00
|
|
|
env:
|
2022-07-20 19:06:23 +00:00
|
|
|
IMAGE: sourcegraph/opentelemetry-collector:dev
|
|
|
|
|
# Overwrite the following in sg.config.overwrite.yaml, based on which collector
|
|
|
|
|
# config you are using - see docker-images/opentelemetry-collector for more details.
|
|
|
|
|
CONFIGURATION_FILE: 'configs/jaeger.yaml'
|
|
|
|
|
# HONEYCOMB_API_KEY: ''
|
|
|
|
|
# HONEYCOMB_DATASET: ''
|
2022-07-05 18:28:15 +00:00
|
|
|
|
2021-10-19 13:11:21 +00:00
|
|
|
storybook:
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm storybook
|
|
|
|
|
install: pnpm install
|
2021-10-19 13:11:21 +00:00
|
|
|
|
2021-09-30 07:23:24 +00:00
|
|
|
# This will execute `env`, a utility to print the process environment. Can
|
|
|
|
|
# be used to debug which global vars `sg` uses.
|
|
|
|
|
debug-env:
|
2022-07-26 14:51:04 +00:00
|
|
|
description: Debug env vars
|
2021-09-30 07:23:24 +00:00
|
|
|
cmd: env
|
|
|
|
|
|
2021-11-10 09:26:15 +00:00
|
|
|
bext:
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm --filter @sourcegraph/browser dev
|
|
|
|
|
install: pnpm install
|
2021-11-10 09:26:15 +00:00
|
|
|
|
2023-01-20 00:35:39 +00:00
|
|
|
sourcegraph:
|
|
|
|
|
description: Single program (Go static binary) distribution
|
|
|
|
|
cmd: |
|
2023-02-03 12:59:05 +00:00
|
|
|
unset SRC_GIT_SERVERS INDEXED_SEARCH_SERVERS REDIS_ENDPOINT
|
2023-01-20 00:35:39 +00:00
|
|
|
|
|
|
|
|
# TODO: This should be fixed
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
# If EXTSVC_CONFIG_FILE is *unset*, set a default.
|
|
|
|
|
export EXTSVC_CONFIG_FILE=${EXTSVC_CONFIG_FILE-'../dev-private/enterprise/dev/external-services-config.json'}
|
|
|
|
|
|
|
|
|
|
.bin/sourcegraph
|
|
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2023-03-10 12:36:20 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -ldflags="-X github.com/sourcegraph/sourcegraph/internal/conf/deploy.forceType=app" -o .bin/sourcegraph github.com/sourcegraph/sourcegraph/enterprise/cmd/sourcegraph
|
2023-01-20 00:35:39 +00:00
|
|
|
checkBinary: .bin/sourcegraph
|
|
|
|
|
env:
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
SITE_CONFIG_FILE: '../dev-private/enterprise/dev/site-config.json'
|
|
|
|
|
SITE_CONFIG_ESCAPE_HATCH_PATH: '$HOME/.sourcegraph/site-config.json'
|
|
|
|
|
WEBPACK_DEV_SERVER: 1
|
|
|
|
|
watch:
|
|
|
|
|
- cmd
|
|
|
|
|
- enterprise
|
|
|
|
|
- internal
|
|
|
|
|
- lib
|
|
|
|
|
- schema
|
|
|
|
|
|
|
|
|
|
sourcegraph-oss:
|
|
|
|
|
description: Single program (Go static binary) distribution, OSS variant
|
|
|
|
|
cmd: |
|
|
|
|
|
unset SRC_GIT_SERVERS INDEXED_SEARCH_SERVERS
|
|
|
|
|
.bin/sourcegraph-oss
|
|
|
|
|
install: |
|
|
|
|
|
if [ -n "$DELVE" ]; then
|
|
|
|
|
export GCFLAGS='all=-N -l'
|
|
|
|
|
fi
|
2023-03-10 12:36:20 +00:00
|
|
|
go build -gcflags="$GCFLAGS" -ldflags="-X github.com/sourcegraph/sourcegraph/internal/conf/deploy.forceType=app" -o .bin/sourcegraph-oss github.com/sourcegraph/sourcegraph/cmd/sourcegraph-oss
|
2023-01-20 00:35:39 +00:00
|
|
|
checkBinary: .bin/sourcegraph-oss
|
|
|
|
|
env:
|
|
|
|
|
WEBPACK_DEV_SERVER: 1
|
|
|
|
|
watch:
|
|
|
|
|
- cmd
|
|
|
|
|
- internal
|
|
|
|
|
- schema
|
|
|
|
|
|
2023-03-02 10:31:51 +00:00
|
|
|
bazelCommands:
|
|
|
|
|
oss-frontend:
|
|
|
|
|
target: //cmd/frontend
|
|
|
|
|
env:
|
|
|
|
|
CONFIGURATION_MODE: server
|
|
|
|
|
USE_ENHANCED_LANGUAGE_DETECTION: false
|
|
|
|
|
# frontend processes need this to be so that the paths to the assets are rendered correctly
|
|
|
|
|
WEBPACK_DEV_SERVER: 1
|
|
|
|
|
oss-worker:
|
|
|
|
|
target: //cmd/worker
|
|
|
|
|
oss-repo-updater:
|
|
|
|
|
target: //cmd/repo-updater
|
|
|
|
|
oss-symbols:
|
|
|
|
|
target: //cmd/symbols
|
|
|
|
|
env:
|
|
|
|
|
CTAGS_COMMAND: dev/universal-ctags-dev
|
|
|
|
|
CTAGS_PROCESSES: 2
|
|
|
|
|
oss-gitserver-0:
|
|
|
|
|
target: //cmd/gitserver
|
|
|
|
|
env:
|
|
|
|
|
<<: *gitserverenv
|
|
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3501
|
|
|
|
|
GITSERVER_ADDR: 127.0.0.1:3501
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_1
|
|
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3551
|
|
|
|
|
oss-gitserver-1:
|
|
|
|
|
target: //cmd/gitserver
|
|
|
|
|
env:
|
|
|
|
|
<<: *gitserverenv
|
|
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3501
|
|
|
|
|
GITSERVER_ADDR: 127.0.0.1:3501
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_1
|
|
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3551
|
|
|
|
|
searcher:
|
|
|
|
|
target: //cmd/searcher
|
|
|
|
|
github-proxy:
|
|
|
|
|
target: //cmd/github-proxy
|
|
|
|
|
frontend:
|
|
|
|
|
description: Enterprise frontend
|
|
|
|
|
target: //enterprise/cmd/frontend
|
|
|
|
|
precmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
# If EXTSVC_CONFIG_FILE is *unset*, set a default.
|
|
|
|
|
export EXTSVC_CONFIG_FILE=${EXTSVC_CONFIG_FILE-'../dev-private/enterprise/dev/external-services-config.json'}
|
|
|
|
|
env:
|
|
|
|
|
CONFIGURATION_MODE: server
|
|
|
|
|
USE_ENHANCED_LANGUAGE_DETECTION: false
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
SITE_CONFIG_FILE: '../dev-private/enterprise/dev/site-config.json'
|
|
|
|
|
SITE_CONFIG_ESCAPE_HATCH_PATH: '$HOME/.sourcegraph/site-config.json'
|
|
|
|
|
# frontend processes need this to be so that the paths to the assets are rendered correctly
|
|
|
|
|
WEBPACK_DEV_SERVER: 1
|
|
|
|
|
worker:
|
|
|
|
|
target: //enterprise/cmd/worker
|
|
|
|
|
precmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
repo-updater:
|
|
|
|
|
target: //enterprise/cmd/repo-updater
|
|
|
|
|
precmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
env:
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
symbols:
|
|
|
|
|
# TODO build ctags thing
|
|
|
|
|
target: //enterprise/cmd/symbols
|
|
|
|
|
checkBinary: .bin/symbols
|
|
|
|
|
env:
|
|
|
|
|
CTAGS_COMMAND: dev/universal-ctags-dev
|
|
|
|
|
CTAGS_PROCESSES: 2
|
|
|
|
|
USE_ROCKSKIP: 'false'
|
|
|
|
|
gitserver-template: &gitserver_bazel_template
|
|
|
|
|
target: //enterprise/cmd/gitserver
|
|
|
|
|
env: &gitserverenv
|
|
|
|
|
HOSTNAME: 127.0.0.1:3178
|
|
|
|
|
# This is only here to stay backwards-compatible with people's custom
|
|
|
|
|
# `sg.config.overwrite.yaml` files
|
|
|
|
|
gitserver:
|
|
|
|
|
<<: *gitserver_bazel_template
|
|
|
|
|
gitserver-0:
|
|
|
|
|
<<: *gitserver_bazel_template
|
|
|
|
|
env:
|
|
|
|
|
<<: *gitserverenv
|
|
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3501
|
|
|
|
|
GITSERVER_ADDR: 127.0.0.1:3501
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_1
|
|
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3551
|
|
|
|
|
gitserver-1:
|
|
|
|
|
<<: *gitserver_bazel_template
|
|
|
|
|
env:
|
|
|
|
|
<<: *gitserverenv
|
|
|
|
|
GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3502
|
|
|
|
|
GITSERVER_ADDR: 127.0.0.1:3502
|
|
|
|
|
SRC_REPOS_DIR: $HOME/.sourcegraph/repos_2
|
|
|
|
|
SRC_PROF_HTTP: 127.0.0.1:3552
|
|
|
|
|
codeintel-worker:
|
|
|
|
|
precmd: |
|
|
|
|
|
export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem)
|
|
|
|
|
target: //enterprise/cmd/precise-code-intel-worker
|
|
|
|
|
executor-template: &executor_template_bazel
|
|
|
|
|
target: //enterprise/cmd/executor
|
|
|
|
|
env:
|
|
|
|
|
EXECUTOR_QUEUE_NAME: TEMPLATE
|
|
|
|
|
TMPDIR: $HOME/.sourcegraph/executor-temp
|
2023-03-28 21:18:56 +00:00
|
|
|
# Required for frontend and executor to communicate
|
|
|
|
|
EXECUTOR_FRONTEND_URL: http://localhost:3080
|
|
|
|
|
# Must match the secret defined in the site config.
|
|
|
|
|
EXECUTOR_FRONTEND_PASSWORD: hunter2hunter2hunter2
|
|
|
|
|
# Disable firecracker inside executor in dev
|
|
|
|
|
EXECUTOR_USE_FIRECRACKER: false
|
2023-03-02 10:31:51 +00:00
|
|
|
codeintel-executor:
|
|
|
|
|
<<: *executor_template_bazel
|
|
|
|
|
env:
|
|
|
|
|
EXECUTOR_QUEUE_NAME: codeintel
|
|
|
|
|
TMPDIR: $HOME/.sourcegraph/indexer-temp
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# CommandSets ################################################################
|
|
|
|
|
#
|
2021-10-01 13:36:24 +00:00
|
|
|
defaultCommandset: enterprise
|
2021-03-29 08:14:53 +00:00
|
|
|
commandsets:
|
2023-03-02 10:31:51 +00:00
|
|
|
oss-bazel:
|
|
|
|
|
# open-source version doesn't require the dev-private repository
|
|
|
|
|
requiresDevPrivate: false
|
|
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
|
|
|
|
- git
|
2023-03-16 09:54:01 +00:00
|
|
|
- bazelisk
|
2023-03-03 13:58:50 +00:00
|
|
|
- ibazel
|
2023-03-02 10:31:51 +00:00
|
|
|
bazelCommands:
|
|
|
|
|
- oss-frontend
|
|
|
|
|
- oss-worker
|
|
|
|
|
- oss-repo-updater
|
|
|
|
|
- oss-symbols
|
|
|
|
|
- oss-gitserver-0
|
|
|
|
|
- oss-gitserver-1
|
|
|
|
|
- github-proxy
|
|
|
|
|
- searcher
|
|
|
|
|
commands:
|
|
|
|
|
- oss-web
|
|
|
|
|
- caddy
|
|
|
|
|
- syntax-highlighter
|
|
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
|
|
|
|
- docsite
|
2021-07-13 16:13:18 +00:00
|
|
|
oss:
|
2021-09-29 14:22:16 +00:00
|
|
|
# open-source version doesn't require the dev-private repository
|
|
|
|
|
requiresDevPrivate: false
|
2021-07-21 15:37:24 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
2022-02-12 18:36:28 +00:00
|
|
|
- git
|
2021-07-21 15:37:24 +00:00
|
|
|
commands:
|
2022-06-20 17:57:54 +00:00
|
|
|
- oss-frontend
|
|
|
|
|
- oss-worker
|
|
|
|
|
- oss-repo-updater
|
|
|
|
|
- oss-symbols
|
2022-12-28 08:44:07 +00:00
|
|
|
- oss-gitserver-0
|
|
|
|
|
- oss-gitserver-1
|
2021-07-21 15:37:24 +00:00
|
|
|
- searcher
|
2022-10-26 15:01:44 +00:00
|
|
|
- oss-web
|
2021-07-21 15:37:24 +00:00
|
|
|
- caddy
|
|
|
|
|
- docsite
|
2021-09-28 04:18:12 +00:00
|
|
|
- syntax-highlighter
|
2021-07-21 15:37:24 +00:00
|
|
|
- github-proxy
|
2022-06-20 17:57:54 +00:00
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2023-03-02 10:31:51 +00:00
|
|
|
enterprise-bazel: &enterprise_bazel_set
|
|
|
|
|
requiresDevPrivate: true
|
|
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
|
|
|
|
- git
|
2023-03-16 09:54:01 +00:00
|
|
|
- bazelisk
|
2023-03-03 13:58:50 +00:00
|
|
|
- ibazel
|
2023-03-02 10:31:51 +00:00
|
|
|
bazelCommands:
|
|
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
|
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
|
|
|
|
- searcher
|
|
|
|
|
- symbols
|
|
|
|
|
- github-proxy
|
|
|
|
|
commands:
|
|
|
|
|
- web
|
|
|
|
|
- blobstore
|
|
|
|
|
- docsite
|
|
|
|
|
- syntax-highlighter
|
|
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
|
|
|
|
- caddy
|
2021-07-13 16:13:18 +00:00
|
|
|
enterprise: &enterprise_set
|
2021-09-29 14:22:16 +00:00
|
|
|
requiresDevPrivate: true
|
2021-07-21 15:37:24 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
2022-02-12 18:36:28 +00:00
|
|
|
- git
|
2021-07-21 15:37:24 +00:00
|
|
|
commands:
|
2022-06-20 17:57:54 +00:00
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
|
|
|
|
- web
|
2022-10-10 13:26:58 +00:00
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
2021-07-21 15:37:24 +00:00
|
|
|
- searcher
|
2022-06-20 17:57:54 +00:00
|
|
|
- symbols
|
2021-07-21 15:37:24 +00:00
|
|
|
- caddy
|
|
|
|
|
- docsite
|
2021-09-28 04:18:12 +00:00
|
|
|
- syntax-highlighter
|
2021-07-21 15:37:24 +00:00
|
|
|
- github-proxy
|
2022-06-20 17:57:54 +00:00
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
2023-01-11 13:51:36 +00:00
|
|
|
- blobstore
|
2021-03-29 08:14:53 +00:00
|
|
|
|
2022-05-04 07:12:30 +00:00
|
|
|
enterprise-e2e:
|
|
|
|
|
<<: *enterprise_set
|
|
|
|
|
env:
|
2022-05-05 16:17:18 +00:00
|
|
|
# EXTSVC_CONFIG_FILE being set prevents the e2e test suite to add
|
2022-05-04 07:12:30 +00:00
|
|
|
# additional connections.
|
2022-07-14 13:05:49 +00:00
|
|
|
EXTSVC_CONFIG_FILE: ''
|
2022-05-04 07:12:30 +00:00
|
|
|
|
2021-09-08 08:41:06 +00:00
|
|
|
dotcom:
|
|
|
|
|
<<: *enterprise_set
|
|
|
|
|
env:
|
|
|
|
|
SOURCEGRAPHDOTCOM_MODE: true
|
|
|
|
|
|
2023-03-02 10:31:51 +00:00
|
|
|
codeintel-bazel: &codeintel_bazel_set
|
|
|
|
|
requiresDevPrivate: true
|
|
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
|
|
|
|
- git
|
2023-03-16 09:54:01 +00:00
|
|
|
- bazelisk
|
2023-03-03 13:58:50 +00:00
|
|
|
- ibazel
|
2023-03-02 10:31:51 +00:00
|
|
|
bazelCommands:
|
|
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
|
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
|
|
|
|
- searcher
|
|
|
|
|
- symbols
|
|
|
|
|
- github-proxy
|
|
|
|
|
- codeintel-worker
|
|
|
|
|
- codeintel-executor
|
|
|
|
|
commands:
|
|
|
|
|
- web
|
|
|
|
|
- blobstore
|
|
|
|
|
- docsite
|
|
|
|
|
- syntax-highlighter
|
|
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
|
|
|
|
- caddy
|
|
|
|
|
- jaeger
|
|
|
|
|
- grafana
|
|
|
|
|
- prometheus
|
|
|
|
|
|
2022-03-11 15:06:49 +00:00
|
|
|
codeintel: &codeintel_set
|
2021-09-29 14:22:16 +00:00
|
|
|
requiresDevPrivate: true
|
2021-07-21 15:37:24 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
2022-02-12 18:36:28 +00:00
|
|
|
- git
|
2021-07-21 15:37:24 +00:00
|
|
|
commands:
|
2022-06-20 17:57:54 +00:00
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
|
|
|
|
- web
|
2022-10-10 13:26:58 +00:00
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
2021-07-21 15:37:24 +00:00
|
|
|
- searcher
|
2022-06-20 17:57:54 +00:00
|
|
|
- symbols
|
2021-07-21 15:37:24 +00:00
|
|
|
- caddy
|
|
|
|
|
- docsite
|
2021-09-28 04:18:12 +00:00
|
|
|
- syntax-highlighter
|
2021-07-21 15:37:24 +00:00
|
|
|
- github-proxy
|
2022-06-20 17:57:54 +00:00
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
2022-11-30 23:04:48 +00:00
|
|
|
- blobstore
|
2022-06-20 17:57:54 +00:00
|
|
|
- codeintel-worker
|
2021-07-21 15:37:24 +00:00
|
|
|
- codeintel-executor
|
2022-12-05 18:10:39 +00:00
|
|
|
# - otel-collector
|
2021-11-30 17:56:45 +00:00
|
|
|
- jaeger
|
|
|
|
|
- grafana
|
|
|
|
|
- prometheus
|
2021-05-21 16:01:11 +00:00
|
|
|
|
2022-03-11 15:06:49 +00:00
|
|
|
enterprise-codeintel:
|
|
|
|
|
<<: *codeintel_set
|
2023-03-02 10:31:51 +00:00
|
|
|
enterprise-codeintel-bazel:
|
|
|
|
|
<<: *codeintel_bazel_set
|
2022-03-11 15:06:49 +00:00
|
|
|
|
2021-05-21 16:01:11 +00:00
|
|
|
enterprise-codeinsights:
|
2021-09-29 14:22:16 +00:00
|
|
|
requiresDevPrivate: true
|
2021-07-21 15:37:24 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
2022-02-12 18:36:28 +00:00
|
|
|
- git
|
2021-07-21 15:37:24 +00:00
|
|
|
commands:
|
2022-06-20 17:57:54 +00:00
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
|
|
|
|
- web
|
2022-10-10 13:26:58 +00:00
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
2021-07-21 15:37:24 +00:00
|
|
|
- searcher
|
|
|
|
|
- symbols
|
|
|
|
|
- caddy
|
|
|
|
|
- docsite
|
2021-09-28 04:18:12 +00:00
|
|
|
- syntax-highlighter
|
2021-07-21 15:37:24 +00:00
|
|
|
- github-proxy
|
2022-06-20 17:57:54 +00:00
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
2023-01-11 13:51:36 +00:00
|
|
|
- blobstore
|
2021-09-08 08:41:06 +00:00
|
|
|
env:
|
|
|
|
|
DISABLE_CODE_INSIGHTS_HISTORICAL: false
|
|
|
|
|
DISABLE_CODE_INSIGHTS: false
|
2021-05-21 16:01:11 +00:00
|
|
|
|
2021-05-25 08:33:48 +00:00
|
|
|
api-only:
|
2021-09-29 14:22:16 +00:00
|
|
|
requiresDevPrivate: true
|
2021-07-21 15:37:24 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
2022-02-12 18:36:28 +00:00
|
|
|
- git
|
2021-07-21 15:37:24 +00:00
|
|
|
commands:
|
2022-06-20 17:57:54 +00:00
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
2022-10-10 13:26:58 +00:00
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
2021-07-21 15:37:24 +00:00
|
|
|
- searcher
|
|
|
|
|
- symbols
|
|
|
|
|
- github-proxy
|
2022-06-20 17:57:54 +00:00
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
2023-01-11 13:51:36 +00:00
|
|
|
- blobstore
|
2021-05-25 08:33:48 +00:00
|
|
|
|
2021-07-05 11:06:52 +00:00
|
|
|
batches:
|
2021-09-29 14:22:16 +00:00
|
|
|
requiresDevPrivate: true
|
2021-07-21 15:37:24 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
2022-02-12 18:36:28 +00:00
|
|
|
- git
|
2021-07-21 15:37:24 +00:00
|
|
|
commands:
|
2022-06-20 17:57:54 +00:00
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
|
|
|
|
- web
|
2022-10-10 13:26:58 +00:00
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
2021-07-21 15:37:24 +00:00
|
|
|
- searcher
|
2022-06-20 17:57:54 +00:00
|
|
|
- symbols
|
2021-07-21 15:37:24 +00:00
|
|
|
- caddy
|
|
|
|
|
- docsite
|
2021-09-28 04:18:12 +00:00
|
|
|
- syntax-highlighter
|
2021-07-21 15:37:24 +00:00
|
|
|
- github-proxy
|
2022-06-20 17:57:54 +00:00
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
2023-01-11 13:51:36 +00:00
|
|
|
- blobstore
|
2021-07-21 15:37:24 +00:00
|
|
|
- batches-executor
|
Experiment: Natively run SSBC in docker (#44034)
This adds an experimental code path that I will use to test a docker-only execution mode for server-side batch changes. This code path is never executed for customers until we make the switch when we deem it ready. This will allow me to dogfood this while it's not available to customer instances yet.
Ultimately, the goal of this is to make executors simply be "the job runner platform through a generic interface". Today, this depends on src-cli to do a good bunch of the work. This is a blocker for going full docker-based with executors, which will ultimately be a requirement on the road to k8s-based executors.
As this removes the dependency on src-cli, nothing but the job interface and API endpoints tie executor and Sourcegraph instance together. Ultimately, this will allow us to support larger version spans between the two (pending executors going GA and being feature-complete).
Known issues/limitations:
Steps skipped in between steps that run don't work yet
Skipping steps dynamically is inefficient as we cannot tell the executor to skip a step IF X, so we replace the script by exit 0
It is unclear if all variants of file mounts still work. Basic cases do work. Files used to be read-only in src-cli, they aren't now, but content is still reset in between steps.
The assumption that everything operates in /work is broken here, because we need to use what executors give us to persist out-of-repo state in between containers (like the step result from the previous step)
It is unclear if workspace mounts work
Cache keys are not correctly computed if using workspace mounts - the metadataretriever is nil
We still use log outputs to transfer the AfterStepResults to the Sourcegraph instance, this should finally become an artifact instead. Then, we don't have to rely on the execution_log_entires anymore and can theoretically prune those after some time. This column is currently growing indefinitely.
It depends on tee being available in the docker images to capture the cmd.stdout/cmd.stderr properly for template variable rendering
Env-vars are not rendered in their evaluated form post-execution
File permissions are unclear and might be similarly broken to how they are now - or even worse
Disclaimer: It's not feature complete today! But it is also not hitting any default code paths either. As development on this goes on, we can eventually remove the feature flag and run the new job format on all instances. This PR handles fallback of rendering old records correctly in the UI already.
2022-11-09 23:20:43 +00:00
|
|
|
- batcheshelper-builder
|
2021-07-05 11:06:52 +00:00
|
|
|
|
embeddings: searcher and indexer (#48017)
# High-level architecture overview
<img width="2231" alt="Screenshot 2023-02-24 at 15 13 59"
src="https://user-images.githubusercontent.com/6417322/221200130-53c1ff25-4c47-4532-885f-5c4f9dadb05e.png">
# Embeddings
Really quickly: embeddings are a semantic representation of text.
Embeddings are usually floating-point vectors with 256+ elements. The
neat thing about embeddings is that they allow us to search over textual
information using a semantic correlation between the query and the text,
not just syntactic (matching keywords).
In this PR, we implemented an embedding service that will allow us to do
semantic code search over repositories in Sourcegraph. So, for example,
you'll be able to ask, "how do access tokens work in Sourcegraph", and
it will give you a list of the closest matching code files.
Additionally, we build a context detection service powered by
embeddings. In chat applications, it is important to know whether the
user's message requires additional context. We have to differentiate
between two cases: the user asks a general question about the codebase,
or the user references something in the existing conversation. In the
latter case, including the context would ruin the flow of the
conversation, and the chatbot would most likely return a confusing
answer. We determine whether a query _does not_ require additional
context using two approaches:
1. We check if the query contains well-known phrases that would indicate
the user is referencing the existing conversation (e.g., translate
previous, change that)
1. We have a static dataset of messages that require context and a
dataset of messages that do not. We embed both datasets, and then, using
embedding similarity, we can check which set is more similar to the
query.
## GraphQL API
We add four new resolvers to the GraphQL API:
```graphql
extend type Query {
embeddingsSearch(repo: ID!, query: String!, codeResultsCount: Int!, textResultsCount: Int!): EmbeddingsSearchResults!
isContextRequiredForQuery(query: String!): Boolean!
}
extend type Mutation {
scheduleRepositoriesForEmbedding(repoNames: [String!]!): EmptyResponse!
scheduleContextDetectionForEmbedding: EmptyResponse!
}
```
- `embeddingsSearch` performs embeddings search over the repo embeddings
and returns the specified number of results
- `isContextRequiredForQuery` determines whether the given query
requires additional context
- `scheduleRepositoriesForEmbedding` schedules a repo embedding
background job
- `scheduleContextDetectionForEmbedding` schedules a context detection
embedding background job that embeds a static dataset of messages.
## Repo embedding background job
Embedding a repository is implemented as a background job. The
background job handler receives the repository and the revision, which
should be embedded. Handler then gathers a list of files from the
gitserver and excludes files >1MB in size. The list of files is split
into code and text files (.md, .txt), and we build a separate embedding
index for both. We split them because in a combined index, the text
files always tended to feature as top results and didn't leave any room
for code files. Once we have the list of files, the procedure is as
follows:
- For each file
- Get file contents from gitserver
- Check if the file is embeddable (is not autogenerated, is large
enough, does not have long lines)
- Split the file into embeddable chunks
- Embed the file chunks using an external embedding service (defined in
site config)
- Add embedded file chunks and metadata to the index
- Metadata contains the file name, the start line, and the end line of
the chunk
- Once all files are processed, the index is marshaled into JSON and
stored in Cloud storage (GCS, S3)
### Site config changes
As mentioned, we use a configurable external embedding API that does the
actual text -> vector embedding part. Ideally, this allows us to swap
embedding providers in the future.
```json
"embeddings": {
"description": "Configuration for embeddings service.",
"type": "object",
"required": ["enabled", "dimensions", "model", "accessToken", "url"],
"properties": {
"enabled": {
"description": "Toggles whether embedding service is enabled.",
"type": "boolean",
"default": false
},
"dimensions": {
"description": "The dimensionality of the embedding vectors.",
"type": "integer",
"minimum": 0
},
"model": {
"description": "The model used for embedding.",
"type": "string"
},
"accessToken": {
"description": "The access token used to authenticate with the external embedding API service.",
"type": "string"
},
"url": {
"description": "The url to the external embedding API service.",
"type": "string",
"format": "uri"
}
}
}
```
## Repo embeddings search
The repo embeddings search is implemented in its own service. When a
user queries a repo using embeddings search, the following happens:
- Download the repo embedding index from blob storage and cache it in
memory
- We cache up to 5 embedding indexes in memory
- Embed the query and use the embedded query vector to find similar code
and text file metadata in the embedding index
- Query gitserver for the actual file contents
- Return the results
## Interesting files
- [Similarity
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-102cc83520004eb0e2795e49bc435c5142ca555189b1db3a52bbf1ffb82fa3c6)
- [Repo embedding job
handler](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-c345f373f426398beb4b9cd5852ba862a2718687882db2a8b2d9c7fbb5f1dc52)
- [External embedding api
client](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-ad1e7956f518e4bcaee17dd9e7ac04a5e090c00d970fcd273919e887e1d2cf8f)
- [Embedding a
repo](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-1f35118727128095b7816791b6f0a2e0e060cddee43d25102859b8159465585c)
- [Embeddings searcher
service](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-5b20f3e7ef87041daeeaef98b58ebf7388519cedcdfc359dc5e6d4e0b021472e)
- [Embeddings
search](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-79f95b9cc3f1ef39c1a0b88015bd9cd6c19c30a8d4c147409f1b8e8cd9462ea1)
- [Repo embedding index cache
management](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-8a41f7dec31054889dbf86e97c52223d5636b4d408c6b375bcfc09160a8b70f8)
- [GraphQL
resolvers](https://github.com/sourcegraph/sourcegraph/pull/48017/files#diff-9b30a0b5efcb63e2f4611b99ab137fbe09629a769a4f30d10a1b2da41a01d21f)
## Test plan
- Start by filling out the `embeddings` object in the site config (let
me know if you need an API key)
- Start the embeddings service using `sg start embeddings`
- Go to the `/api/console` page and schedule a repo embedding job and a
context detection embedding job:
```graphql
mutation {
scheduleRepositoriesForEmbedding(repoNames: ["github.com/sourcegraph/handbook"]) {
__typename
}
scheduleContextDetectionForEmbedding {
__typename
}
}
```
- Once both are finished, you should be able to query the repo embedding
index, and determine whether context is need for a given query:
```graphql
query {
isContextRequiredForQuery(query: "how do access tokens work")
embeddingsSearch(
repo: "UmVwb3NpdG9yeToy", # github.com/sourcegraph/handbook GQL ID
query: "how do access tokens work",
codeResultsCount: 5,
textResultsCount: 5) {
codeResults {
fileName
content
}
textResults {
fileName
content
}
}
}
```
2023-03-01 09:50:12 +00:00
|
|
|
embeddings:
|
|
|
|
|
requiresDevPrivate: true
|
|
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
|
|
|
|
- git
|
|
|
|
|
commands:
|
|
|
|
|
- embeddings
|
|
|
|
|
- frontend
|
|
|
|
|
- worker
|
|
|
|
|
- repo-updater
|
|
|
|
|
- web
|
|
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
|
|
|
|
- searcher
|
|
|
|
|
- symbols
|
|
|
|
|
- caddy
|
|
|
|
|
- docsite
|
|
|
|
|
- syntax-highlighter
|
|
|
|
|
- github-proxy
|
|
|
|
|
- zoekt-index-0
|
|
|
|
|
- zoekt-index-1
|
|
|
|
|
- zoekt-web-0
|
|
|
|
|
- zoekt-web-1
|
|
|
|
|
- blobstore
|
|
|
|
|
|
2022-06-10 06:28:27 +00:00
|
|
|
iam:
|
2021-09-29 14:22:16 +00:00
|
|
|
requiresDevPrivate: true
|
2021-07-23 07:59:43 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
2022-02-12 18:36:28 +00:00
|
|
|
- git
|
2021-07-23 07:59:43 +00:00
|
|
|
commands:
|
2022-06-20 17:57:54 +00:00
|
|
|
- frontend
|
|
|
|
|
- repo-updater
|
|
|
|
|
- web
|
2022-10-10 13:26:58 +00:00
|
|
|
- gitserver-0
|
|
|
|
|
- gitserver-1
|
2021-07-23 07:59:43 +00:00
|
|
|
- caddy
|
|
|
|
|
- github-proxy
|
|
|
|
|
|
2021-08-02 08:25:39 +00:00
|
|
|
monitoring:
|
2021-08-09 10:30:54 +00:00
|
|
|
checks:
|
|
|
|
|
- docker
|
2021-08-02 08:25:39 +00:00
|
|
|
commands:
|
|
|
|
|
- jaeger
|
2022-08-29 16:42:36 +00:00
|
|
|
- otel-collector
|
2021-08-02 08:25:39 +00:00
|
|
|
- prometheus
|
|
|
|
|
- grafana
|
|
|
|
|
- postgres_exporter
|
2021-08-13 17:50:27 +00:00
|
|
|
- monitoring-generator
|
2021-08-02 08:25:39 +00:00
|
|
|
|
2021-08-09 10:30:54 +00:00
|
|
|
monitoring-alerts:
|
|
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
- redis
|
|
|
|
|
- postgres
|
|
|
|
|
commands:
|
|
|
|
|
- prometheus
|
|
|
|
|
- grafana
|
|
|
|
|
# For generated alerts docs
|
|
|
|
|
- docsite
|
|
|
|
|
# For the alerting integration with frontend
|
2022-06-20 17:57:54 +00:00
|
|
|
- frontend
|
|
|
|
|
- web
|
2021-08-09 10:30:54 +00:00
|
|
|
- caddy
|
|
|
|
|
|
2021-10-12 10:44:39 +00:00
|
|
|
web-standalone:
|
|
|
|
|
commands:
|
2021-10-22 10:30:29 +00:00
|
|
|
- web-standalone-http
|
2021-10-12 10:44:39 +00:00
|
|
|
- caddy
|
|
|
|
|
env:
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
|
|
|
|
|
oss-web-standalone:
|
|
|
|
|
commands:
|
2021-10-22 10:30:29 +00:00
|
|
|
- web-standalone-http
|
2021-10-12 10:44:39 +00:00
|
|
|
- caddy
|
|
|
|
|
|
|
|
|
|
web-standalone-prod:
|
|
|
|
|
commands:
|
2021-10-22 10:30:29 +00:00
|
|
|
- web-standalone-http-prod
|
2021-10-12 10:44:39 +00:00
|
|
|
- caddy
|
|
|
|
|
env:
|
|
|
|
|
ENTERPRISE: 1
|
|
|
|
|
|
|
|
|
|
oss-web-standalone-prod:
|
|
|
|
|
commands:
|
2021-10-22 10:30:29 +00:00
|
|
|
- web-standalone-http-prod
|
2021-10-12 10:44:39 +00:00
|
|
|
- caddy
|
|
|
|
|
|
2022-07-26 14:51:04 +00:00
|
|
|
# For testing our OpenTelemetry stack
|
2022-07-05 18:28:15 +00:00
|
|
|
otel:
|
|
|
|
|
checks:
|
|
|
|
|
- docker
|
|
|
|
|
commands:
|
|
|
|
|
- otel-collector
|
|
|
|
|
- jaeger
|
|
|
|
|
|
2023-01-30 17:43:20 +00:00
|
|
|
app:
|
2023-01-20 00:35:39 +00:00
|
|
|
requiresDevPrivate: true
|
|
|
|
|
checks:
|
|
|
|
|
- git
|
|
|
|
|
commands:
|
|
|
|
|
- sourcegraph
|
|
|
|
|
- docsite
|
|
|
|
|
- web
|
|
|
|
|
- caddy
|
2023-03-14 16:22:09 +00:00
|
|
|
env:
|
|
|
|
|
DISABLE_CODE_INSIGHTS: false
|
2023-01-20 00:35:39 +00:00
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
tests:
|
|
|
|
|
# These can be run with `sg test [name]`
|
|
|
|
|
backend:
|
2021-05-10 08:28:45 +00:00
|
|
|
cmd: go test
|
|
|
|
|
defaultArgs: ./...
|
|
|
|
|
|
2021-03-29 08:14:53 +00:00
|
|
|
backend-integration:
|
|
|
|
|
cmd: cd dev/gqltest && go test -long -base-url $BASE_URL -email $EMAIL -username $USERNAME -password $PASSWORD ./gqltest
|
|
|
|
|
env:
|
|
|
|
|
# These are defaults. They can be overwritten by setting the env vars when
|
|
|
|
|
# running the command.
|
2021-05-31 07:51:39 +00:00
|
|
|
BASE_URL: 'http://localhost:3080'
|
|
|
|
|
EMAIL: 'joe@sourcegraph.com'
|
|
|
|
|
PASSWORD: '12345'
|
2021-05-10 08:28:45 +00:00
|
|
|
|
2021-11-10 09:26:15 +00:00
|
|
|
bext:
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm --filter @sourcegraph/browser test
|
2021-11-10 09:26:15 +00:00
|
|
|
|
2021-11-25 12:46:36 +00:00
|
|
|
bext-build:
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: EXTENSION_PERMISSIONS_ALL_URLS=true pnpm --filter @sourcegraph/browser build
|
2021-11-25 12:46:36 +00:00
|
|
|
|
2021-11-10 09:26:15 +00:00
|
|
|
bext-integration:
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm --filter @sourcegraph/browser test-integration
|
2021-11-10 09:26:15 +00:00
|
|
|
|
|
|
|
|
bext-e2e:
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm --filter @sourcegraph/browser mocha ./src/end-to-end/github.test.ts ./src/end-to-end/gitlab.test.ts
|
2021-11-10 09:26:15 +00:00
|
|
|
env:
|
|
|
|
|
SOURCEGRAPH_BASE_URL: https://sourcegraph.com
|
|
|
|
|
|
2022-08-24 11:41:51 +00:00
|
|
|
client:
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm jest --testPathIgnorePatterns end-to-end regression integration storybook
|
2021-05-10 08:28:45 +00:00
|
|
|
|
2022-09-09 09:36:10 +00:00
|
|
|
docsite:
|
|
|
|
|
cmd: .bin/docsite_${DOCSITE_VERSION} check ./doc
|
|
|
|
|
env:
|
2023-03-28 10:21:52 +00:00
|
|
|
DOCSITE_VERSION: v1.9.2 # Update DOCSITE_VERSION everywhere in all places (including outside this repo)
|
2022-09-09 09:36:10 +00:00
|
|
|
|
|
|
|
|
web-e2e:
|
2022-05-04 07:12:30 +00:00
|
|
|
preamble: |
|
2022-07-06 21:38:47 +00:00
|
|
|
A Sourcegraph isntance must be already running for these tests to work, most
|
2022-08-24 11:41:51 +00:00
|
|
|
commonly with: `sg start enterprise-e2e`
|
2022-05-04 07:12:30 +00:00
|
|
|
|
2022-08-24 11:41:51 +00:00
|
|
|
See more details: https://docs.sourcegraph.com/dev/how-to/testing#running-end-to-end-tests
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm test-e2e
|
2022-05-04 07:12:30 +00:00
|
|
|
env:
|
|
|
|
|
TEST_USER_EMAIL: test@sourcegraph.com
|
|
|
|
|
TEST_USER_PASSWORD: supersecurepassword
|
|
|
|
|
SOURCEGRAPH_BASE_URL: https://sourcegraph.test:3443
|
|
|
|
|
BROWSER: chrome
|
2022-05-05 16:17:18 +00:00
|
|
|
external_secrets:
|
2022-05-04 07:12:30 +00:00
|
|
|
GH_TOKEN:
|
2022-07-14 13:05:49 +00:00
|
|
|
project: 'sourcegraph-ci'
|
|
|
|
|
name: 'BUILDKITE_GITHUBDOTCOM_TOKEN'
|
2021-08-05 08:19:45 +00:00
|
|
|
|
2022-09-09 09:36:10 +00:00
|
|
|
web-regression:
|
2022-08-24 11:41:51 +00:00
|
|
|
preamble: |
|
|
|
|
|
A Sourcegraph instance must be already running for these tests to work, most
|
|
|
|
|
commonly with: `sg start enterprise-e2e`
|
|
|
|
|
|
|
|
|
|
See more details: https://docs.sourcegraph.com/dev/how-to/testing#running-regression-tests
|
|
|
|
|
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm test-regression
|
2022-08-24 11:41:51 +00:00
|
|
|
env:
|
|
|
|
|
SOURCEGRAPH_SUDO_USER: test
|
|
|
|
|
SOURCEGRAPH_BASE_URL: https://sourcegraph.test:3443
|
|
|
|
|
TEST_USER_PASSWORD: supersecurepassword
|
|
|
|
|
BROWSER: chrome
|
|
|
|
|
|
2022-09-09 09:36:10 +00:00
|
|
|
web-integration:
|
|
|
|
|
preamble: |
|
|
|
|
|
A web application should be built for these tests to work, most
|
2023-01-30 06:51:24 +00:00
|
|
|
commonly with: `sg run web-integration-build` or `sg run web-integration-build-prod` for production build.
|
2022-09-09 09:36:10 +00:00
|
|
|
|
|
|
|
|
See more details: https://docs.sourcegraph.com/dev/how-to/testing#running-integration-tests
|
|
|
|
|
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm test-integration
|
2022-09-09 09:36:10 +00:00
|
|
|
|
|
|
|
|
web-integration:debug:
|
|
|
|
|
preamble: |
|
|
|
|
|
A Sourcegraph instance must be already running for these tests to work, most
|
|
|
|
|
commonly with: `sg start web-standalone`
|
|
|
|
|
|
|
|
|
|
See more details: https://docs.sourcegraph.com/dev/how-to/testing#running-integration-tests
|
|
|
|
|
|
2023-01-12 03:50:09 +00:00
|
|
|
cmd: pnpm test-integration:debug
|