fix(profiling): Ingest profile file path (#4060)

* fix(profiling): Ingest profile file path

`ingest-profiles` is now using vroomrs to ingest profiles instead of writing
through vroom. For self-hosted, we need to make sure filestore for profiles is
properly configured so vroom can find the ingested profiles.

* feat: move profiling data to seaweedfs

* feat: review from Sentry

* Apply suggestions from code review

Co-authored-by: Burak Yigit Kaya <byk@sentry.io>

* ref: volume migration tests

* ref: execute file creation from vroom container

* fix: brainfart

* debug

* hack

* more debug

* now I know what I'm missing out

* explicitly state feature complete

* try to pull vroom image

* should only run when COMPOSE_PROFILES is feature complete

* using run invoked weed instead of empty shell

* execute the upload script from vroom container

* execute apt command as root

* gonna sleep

* missing endgroup

* missing sh

* directly execute s3cmd and do 'wc' outside out the container

* why did other test start failing

* manual cleanup

* vroom is not a persistent volume

* what about not removing the seaweed volume

---------

Co-authored-by: Reinaldy Rafli <github@reinaldyrafli.com>
Co-authored-by: Burak Yigit Kaya <byk@sentry.io>
This commit is contained in:
Tony Xiao 2025-11-28 02:30:28 -05:00 committed by GitHub
parent 5a670d1c5f
commit 84ebc583de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 172 additions and 2 deletions

View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
source _unit-test/_test_setup.sh
source install/dc-detect-version.sh
source install/create-docker-volumes.sh
source install/ensure-files-from-examples.sh
export COMPOSE_PROFILES="feature-complete"
$dc pull vroom
source install/ensure-correct-permissions-profiles-dir.sh
# Generate some random files on `sentry-vroom` volume for testing
$dc run --rm --no-deps -v sentry-vroom:/var/vroom/sentry-profiles --entrypoint /bin/bash vroom -c '
for i in $(seq 1 1000); do
echo This is test file $i > /var/vroom/sentry-profiles/test_file_$i.txt
done
'
# Set the flag to apply automatic updates
export APPLY_AUTOMATIC_CONFIG_UPDATES=1
# Here we're just gonna test to run it multiple times
# Only to make sure it doesn't break
for i in $(seq 1 5); do
source install/bootstrap-s3-profiles.sh
done
# Ensure that the files have been migrated to SeaweedFS
migrated_files_count=$($dc exec seaweedfs s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=seaweedfs:8333 --host-bucket="seaweedfs:8333/%(bucket)" ls s3://profiles/ | wc -l)
if [[ "$migrated_files_count" -ne 1000 ]]; then
echo "Error: Expected 1000 migrated files, but found $migrated_files_count"
exit 1
fi
# Manual cleanup, otherwise `create-docker-volumes.sh` will fail
$dc down -v --remove-orphans
report_success

View File

@ -752,7 +752,9 @@ services:
environment:
SENTRY_KAFKA_BROKERS_PROFILING: "kafka:9092"
SENTRY_KAFKA_BROKERS_OCCURRENCES: "kafka:9092"
SENTRY_BUCKET_PROFILES: file:///var/vroom/sentry-profiles
SENTRY_BUCKET_PROFILES: "s3://profiles?region=us-east-1&endpoint=seaweedfs:8333&s3ForcePathStyle=true&disableSSL=true"
AWS_ACCESS_KEY: "sentry"
AWS_SECRET_KEY: "sentry"
SENTRY_SNUBA_HOST: "http://snuba-api:1218"
volumes:
- sentry-vroom:/var/vroom/sentry-profiles

View File

@ -40,6 +40,7 @@ source install/bootstrap-s3-nodestore.sh
source install/bootstrap-snuba.sh
source install/upgrade-postgres.sh
source install/ensure-correct-permissions-profiles-dir.sh
source install/bootstrap-s3-profiles.sh
source install/set-up-and-migrate-database.sh
source install/migrate-pgbouncer.sh
source install/geoip.sh

View File

@ -1,6 +1,6 @@
echo "${_group}Bootstrapping seaweedfs (node store)..."
$dc up --wait seaweedfs postgres
start_service_and_wait_ready seaweedfs postgres
$dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" seaweedfs apk add --no-cache s3cmd
$dc exec seaweedfs mkdir -p /data/idx/
s3cmd="$dc exec seaweedfs s3cmd"

View File

@ -0,0 +1,118 @@
# The purpose of this file is to have both `sentry`-based containers and `vroom` use the same bucket for profiling.
# On pre-25.10.0, we have a `sentry-vroom` volume which stores the profiling data however, since this version,
# the behavior changed, and `vroomrs` now ingests profiles directly. Both services must share the same bucket,
# but at the time of this writing, it's not possible because the `sentry-vroom` volume has ownership set to `vroom:vroom`.
# This prevents the `sentry`-based containers from performing read/write operations on that volume.
#
# Therefore, this script should do the following:
# 1. Check if there are any files inside the `sentry-vroom` volume.
# 2. If (1) finds files, copy those files into a "profiles" bucket on SeaweedFS.
# 3. Point `filestore-profiles` and vroom to the SeaweedFS "profiles" bucket.
# Should only run when `$COMPOSE_PROFILES` is set to `feature-complete`
if [[ "$COMPOSE_PROFILES" == "feature-complete" ]]; then
echo "${_group}Bootstrapping seaweedfs (profiles)..."
start_service_and_wait_ready seaweedfs
$dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" seaweedfs apk add --no-cache s3cmd
s3cmd="$dc exec seaweedfs s3cmd"
bucket_list=$($s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' ls)
if [[ $(echo "$bucket_list" | tail -1 | awk '{print $3}') != 's3://profiles' ]]; then
apply_config_changes_profiles=0
# Only touch if no existing profiles config is found
if ! grep -q "filestore.profiles-backend" $SENTRY_CONFIG_YML; then
if [[ -z "${APPLY_AUTOMATIC_CONFIG_UPDATES:-}" ]]; then
echo
echo "We are migrating the Profiles data directory from the 'sentry-vroom' volume to SeaweedFS."
echo "This migration will ensure profiles ingestion works correctly with the new 'vroomrs'"
echo "and allows both 'sentry' and 'vroom' to transition smoothly."
echo "To complete this, your sentry/config.yml file needs to be modified."
echo "Would you like us to perform this modification automatically?"
echo
yn=""
until [ ! -z "$yn" ]; do
read -p "y or n? " yn
case $yn in
y | yes | 1)
export apply_config_changes_profiles=1
echo
echo -n "Thank you."
;;
n | no | 0)
export apply_config_changes_profiles=0
echo
echo -n "Alright, you will need to update your sentry/config.yml file manually before running 'docker compose up'."
;;
*) yn="" ;;
esac
done
echo
echo "To avoid this prompt in the future, use one of these flags:"
echo
echo " --apply-automatic-config-updates"
echo " --no-apply-automatic-config-updates"
echo
echo "or set the APPLY_AUTOMATIC_CONFIG_UPDATES environment variable:"
echo
echo " APPLY_AUTOMATIC_CONFIG_UPDATES=1 to apply automatic updates"
echo " APPLY_AUTOMATIC_CONFIG_UPDATES=0 to not apply automatic updates"
echo
sleep 5
fi
if [[ "$APPLY_AUTOMATIC_CONFIG_UPDATES" == 1 || "$apply_config_changes_profiles" == 1 ]]; then
profiles_config=$(sed -n '/filestore.profiles-backend/,/s3v4"/{p}' sentry/config.example.yml)
echo "$profiles_config" >>$SENTRY_CONFIG_YML
fi
fi
$s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' mb s3://profiles
# Check if there are files in the sentry-vroom volume
start_service_and_wait_ready vroom
vroom_files_count=$($dc exec vroom sh -c "find /var/vroom/sentry-profiles -type f | wc -l")
if [[ "$vroom_files_count" -gt 0 ]]; then
echo "Migrating $vroom_files_count files from 'sentry-vroom' volume to 'profiles' bucket on SeaweedFS..."
# Use a temporary container to copy files from the volume to SeaweedFS
$dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" -u root vroom sh -c 'mkdir -p /var/lib/apt/lists/partial && apt-get update && apt-get install -y --no-install-recommends s3cmd'
$dc exec vroom sh -c 's3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=seaweedfs:8333 --host-bucket="seaweedfs:8333/%(bucket)" sync /var/vroom/sentry-profiles/ s3://profiles/'
echo "Migration completed."
else
echo "No files found in 'sentry-vroom' volume. Skipping files migration."
fi
else
echo "'profiles' bucket already exists on SeaweedFS. Skipping creation."
fi
if [[ -z "${APPLY_AUTOMATIC_CONFIG_UPDATES:-}" || "$APPLY_AUTOMATIC_CONFIG_UPDATES" == 1 ]]; then
lifecycle_policy=$(
cat <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<LifecycleConfiguration>
<Rule>
<ID>Sentry-Profiles-Rule</ID>
<Status>Enabled</Status>
<Filter></Filter>
<Expiration>
<Days>$SENTRY_EVENT_RETENTION_DAYS</Days>
</Expiration>
</Rule>
</LifecycleConfiguration>
EOF
)
$dc exec seaweedfs sh -c "printf '%s' '$lifecycle_policy' > /tmp/profiles-lifecycle-policy.xml"
$s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' setlifecycle /tmp/profiles-lifecycle-policy.xml s3://profiles
echo "Making sure the bucket lifecycle policy is all set up correctly..."
$s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' getlifecycle s3://profiles
fi
echo "${_endgroup}"
fi

View File

@ -96,6 +96,18 @@ releasefile.cache-path: '/data/releasefile-cache'
# secret_key: 'XXXXXXX'
# bucket_name: 's3-bucket-name'
filestore.profiles-backend: 's3'
filestore.profiles-options:
bucket_acl: "private"
default_acl: "private"
access_key: "sentry"
secret_key: "sentry"
bucket_name: "profiles"
region_name: "us-east-1"
endpoint_url: "http://seaweedfs:8333"
addressing_style: "path"
signature_version: "s3v4"
symbolicator.enabled: true
symbolicator.options:
url: "http://symbolicator:3021"