mirror of
https://github.com/web-arena-x/webarena.git
synced 2026-02-06 11:16:53 +00:00
Increase OSRM container memory from 1GB to 4GB to prevent OOM crashes Add --strip-components=5 to tar extraction to fix nested volume directories
315 lines
12 KiB
YAML
315 lines
12 KiB
YAML
#cloud-config
|
|
# WebArena Map Backend Server Boot-Init Script
|
|
# Based on successful deployment from trajectory analysis
|
|
# This script sets up tile server, geocoding server, and routing servers
|
|
|
|
package_update: true
|
|
package_upgrade: false
|
|
package_reboot_if_required: false
|
|
|
|
# Configure APT with retry logic and better error handling
|
|
apt:
|
|
conf: |
|
|
APT::Acquire::Retries "3";
|
|
APT::Acquire::http::Timeout "30";
|
|
APT::Acquire::https::Timeout "30";
|
|
Dpkg::Options {
|
|
"--force-confdef";
|
|
"--force-confold";
|
|
};
|
|
|
|
packages:
|
|
- docker.io
|
|
- curl
|
|
- wget
|
|
- htop
|
|
- unzip
|
|
|
|
# Create swap file to handle memory-intensive operations
|
|
bootcmd:
|
|
- |
|
|
# Create 4GB swap file to handle large data extractions (reduced from 8GB to save space)
|
|
if [ ! -f /swapfile ]; then
|
|
fallocate -l 4G /swapfile
|
|
chmod 600 /swapfile
|
|
mkswap /swapfile
|
|
swapon /swapfile
|
|
echo '/swapfile none swap sw 0 0' >> /etc/fstab
|
|
fi
|
|
|
|
runcmd:
|
|
# Wait for package locks to be released
|
|
- while fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do echo "Waiting for dpkg lock..."; sleep 5; done
|
|
- while fuser /var/lib/apt/lists/lock >/dev/null 2>&1; do echo "Waiting for apt lock..."; sleep 5; done
|
|
|
|
# Enable and start Docker with retries
|
|
- systemctl enable docker
|
|
- systemctl start docker
|
|
- sleep 10
|
|
|
|
# Add ubuntu user to docker group
|
|
- usermod -aG docker ubuntu
|
|
|
|
# Create necessary directories
|
|
- mkdir -p /opt/osm_dump /opt/osrm /var/lib/docker/volumes
|
|
- mkdir -p /root/logs
|
|
|
|
# Install AWS CLI v2 (awscli package not available in Ubuntu 24.04)
|
|
- curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip
|
|
- unzip /tmp/awscliv2.zip -d /tmp/
|
|
- /tmp/aws/install
|
|
- rm -rf /tmp/awscliv2.zip /tmp/aws
|
|
|
|
# Configure AWS CLI for S3 access (no credentials needed for public buckets)
|
|
- mkdir -p /root/.aws
|
|
- |
|
|
cat > /root/.aws/config << 'EOF'
|
|
[default]
|
|
region = us-east-2
|
|
output = json
|
|
EOF
|
|
|
|
# Create a comprehensive bootstrap script that runs in background
|
|
- |
|
|
cat > /root/bootstrap.sh << 'EOF'
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
exec > >(tee -a /var/log/webarena-map-bootstrap.log) 2>&1
|
|
|
|
echo "$(date): Starting WebArena map server bootstrap"
|
|
echo "$(date): System info: $(uname -a)"
|
|
echo "$(date): Available memory: $(free -h)"
|
|
echo "$(date): Available disk space: $(df -h)"
|
|
|
|
# Check if we have enough disk space (need at least 200GB free)
|
|
AVAILABLE_GB=$(df / | awk 'NR==2 {print int($4/1024/1024)}')
|
|
echo "$(date): Available disk space: ${AVAILABLE_GB}GB"
|
|
if [ "$AVAILABLE_GB" -lt 200 ]; then
|
|
echo "$(date): ERROR: Insufficient disk space. Need at least 200GB, have ${AVAILABLE_GB}GB"
|
|
exit 1
|
|
fi
|
|
|
|
# Function to retry commands with exponential backoff
|
|
retry() {
|
|
local n=1
|
|
local max=5
|
|
local delay=30
|
|
while true; do
|
|
"$@" && break || {
|
|
if [[ $n -lt $max ]]; then
|
|
((n++))
|
|
echo "$(date): Command failed. Attempt $n/$max. Waiting ${delay}s..."
|
|
sleep $delay
|
|
delay=$((delay * 2)) # Exponential backoff
|
|
else
|
|
echo "$(date): Command failed after $n attempts: $*"
|
|
return 1
|
|
fi
|
|
}
|
|
done
|
|
}
|
|
|
|
# Function to monitor background processes
|
|
monitor_extraction() {
|
|
local pid=$1
|
|
local desc=$2
|
|
echo "$(date): Monitoring $desc (PID: $pid)"
|
|
while kill -0 $pid 2>/dev/null; do
|
|
echo "$(date): $desc still running..."
|
|
sleep 60
|
|
done
|
|
wait $pid
|
|
local exit_code=$?
|
|
if [ $exit_code -eq 0 ]; then
|
|
echo "$(date): ✅ $desc completed successfully"
|
|
else
|
|
echo "$(date): ❌ $desc failed with exit code $exit_code"
|
|
return $exit_code
|
|
fi
|
|
}
|
|
|
|
# Download and extract data with retries and parallel processing where safe
|
|
echo "$(date): Starting data downloads..."
|
|
|
|
# Download all files first (can be done in parallel)
|
|
echo "$(date): Downloading OSM tile server data..."
|
|
retry aws s3 cp --no-sign-request s3://webarena-map-server-data/osm_tile_server.tar /root/osm_tile_server.tar &
|
|
DOWNLOAD_TILE_PID=$!
|
|
|
|
echo "$(date): Downloading Nominatim data..."
|
|
retry aws s3 cp --no-sign-request s3://webarena-map-server-data/nominatim_volumes.tar /root/nominatim_volumes.tar &
|
|
DOWNLOAD_NOM_PID=$!
|
|
|
|
echo "$(date): Downloading OSM dump..."
|
|
retry aws s3 cp --no-sign-request s3://webarena-map-server-data/osm_dump.tar /root/osm_dump.tar &
|
|
DOWNLOAD_DUMP_PID=$!
|
|
|
|
echo "$(date): Downloading OSRM routing data..."
|
|
retry aws s3 cp --no-sign-request s3://webarena-map-server-data/osrm_routing.tar /root/osrm_routing.tar &
|
|
DOWNLOAD_OSRM_PID=$!
|
|
|
|
# Wait for all downloads to complete
|
|
echo "$(date): Waiting for downloads to complete..."
|
|
monitor_extraction $DOWNLOAD_TILE_PID "OSM tile server download"
|
|
monitor_extraction $DOWNLOAD_NOM_PID "Nominatim download"
|
|
monitor_extraction $DOWNLOAD_DUMP_PID "OSM dump download"
|
|
monitor_extraction $DOWNLOAD_OSRM_PID "OSRM routing download"
|
|
|
|
echo "$(date): All downloads completed. Starting extractions..."
|
|
|
|
# Extract files sequentially to avoid memory issues and clean up immediately
|
|
# Note: Using --strip-components=5 to remove nested 'projects/ogma3/docker/volumes/' prefix
|
|
echo "$(date): Extracting OSM tile server data..."
|
|
tar -C /var/lib/docker/volumes --strip-components=5 -xf /root/osm_tile_server.tar
|
|
rm -f /root/osm_tile_server.tar # Clean up immediately to save space
|
|
echo "$(date): ✅ OSM tile server data extracted and cleaned up"
|
|
|
|
echo "$(date): Extracting Nominatim data..."
|
|
tar -C /var/lib/docker/volumes --strip-components=5 -xf /root/nominatim_volumes.tar
|
|
rm -f /root/nominatim_volumes.tar # Clean up immediately to save space
|
|
echo "$(date): ✅ Nominatim data extracted and cleaned up"
|
|
|
|
echo "$(date): Extracting OSM dump..."
|
|
tar -C /opt/osm_dump -xf /root/osm_dump.tar
|
|
rm -f /root/osm_dump.tar # Clean up immediately to save space
|
|
echo "$(date): ✅ OSM dump extracted and cleaned up"
|
|
|
|
echo "$(date): Extracting OSRM routing data..."
|
|
tar -C /opt/osrm -xf /root/osrm_routing.tar
|
|
rm -f /root/osrm_routing.tar # Clean up immediately to save space
|
|
echo "$(date): ✅ OSRM routing data extracted and cleaned up"
|
|
|
|
# Verify extracted data
|
|
echo "$(date): Verifying extracted data..."
|
|
ls -la /var/lib/docker/volumes/ | head -20
|
|
ls -la /opt/osm_dump/ | head -10
|
|
ls -la /opt/osrm/ | head -10
|
|
|
|
# Pull Docker images
|
|
echo "$(date): Pulling Docker images..."
|
|
docker pull overv/openstreetmap-tile-server
|
|
docker pull mediagis/nominatim:4.2
|
|
docker pull ghcr.io/project-osrm/osrm-backend:v5.27.1
|
|
|
|
# Start containers with restart policies and proper resource limits
|
|
echo "$(date): Starting tile server..."
|
|
docker run --name tile --restart unless-stopped \
|
|
--memory=2g --memory-swap=4g \
|
|
--volume=osm-data:/data/database/ --volume=osm-tiles:/data/tiles/ \
|
|
-p 8080:80 -d overv/openstreetmap-tile-server run
|
|
|
|
# Wait a bit for tile server to initialize
|
|
sleep 30
|
|
|
|
echo "$(date): Starting Nominatim geocoding server..."
|
|
docker run --name nominatim --restart unless-stopped \
|
|
--memory=4g --memory-swap=8g \
|
|
--env=IMPORT_STYLE=extratags \
|
|
--env=PBF_PATH=/nominatim/data/us-northeast-latest.osm.pbf \
|
|
--env=IMPORT_WIKIPEDIA=/nominatim/data/wikimedia-importance.sql.gz \
|
|
--volume=/opt/osm_dump:/nominatim/data \
|
|
--volume=nominatim-data:/var/lib/postgresql/14/main \
|
|
--volume=nominatim-flatnode:/nominatim/flatnode \
|
|
-p 8085:8080 -d mediagis/nominatim:4.2 /app/start.sh
|
|
|
|
# Wait for Nominatim to initialize
|
|
sleep 60
|
|
|
|
echo "$(date): Starting OSRM routing servers..."
|
|
|
|
# Start OSRM car routing
|
|
docker run --name osrm-car --restart unless-stopped \
|
|
--memory=4g --memory-swap=8g \
|
|
--volume=/opt/osrm/car:/data -p 5000:5000 -d \
|
|
ghcr.io/project-osrm/osrm-backend:v5.27.1 osrm-routed --algorithm mld /data/us-northeast-latest.osrm
|
|
|
|
# Start OSRM bike routing
|
|
docker run --name osrm-bike --restart unless-stopped \
|
|
--memory=4g --memory-swap=8g \
|
|
--volume=/opt/osrm/bike:/data -p 5001:5000 -d \
|
|
ghcr.io/project-osrm/osrm-backend:v5.27.1 osrm-routed --algorithm mld /data/us-northeast-latest.osrm
|
|
|
|
# Start OSRM foot routing
|
|
docker run --name osrm-foot --restart unless-stopped \
|
|
--memory=4g --memory-swap=8g \
|
|
--volume=/opt/osrm/foot:/data -p 5002:5000 -d \
|
|
ghcr.io/project-osrm/osrm-backend:v5.27.1 osrm-routed --algorithm mld /data/us-northeast-latest.osrm
|
|
|
|
echo "$(date): All services started. Waiting for initialization..."
|
|
sleep 120
|
|
|
|
echo "$(date): Verifying service health..."
|
|
docker ps --format "table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}"
|
|
|
|
# Test service endpoints
|
|
echo "$(date): Testing service endpoints..."
|
|
|
|
# Test tile server
|
|
if curl -f -s -o /dev/null "http://localhost:8080/tile/0/0/0.png"; then
|
|
echo "$(date): ✅ Tile server is responding"
|
|
else
|
|
echo "$(date): ❌ Tile server is not responding"
|
|
fi
|
|
|
|
# Test Nominatim
|
|
if curl -f -s -o /dev/null "http://localhost:8085/search?q=test&format=json&limit=1"; then
|
|
echo "$(date): ✅ Nominatim is responding"
|
|
else
|
|
echo "$(date): ❌ Nominatim is not responding"
|
|
fi
|
|
|
|
# Test OSRM services
|
|
for service in car bike foot; do
|
|
port=$((5000 + $(echo "car bike foot" | tr ' ' '\n' | grep -n $service | cut -d: -f1) - 1))
|
|
if curl -f -s -o /dev/null "http://localhost:$port/route/v1/$service/-79.9959,40.4406;-79.9,40.45?overview=false"; then
|
|
echo "$(date): ✅ OSRM $service routing is responding"
|
|
else
|
|
echo "$(date): ❌ OSRM $service routing is not responding"
|
|
fi
|
|
done
|
|
|
|
# All tar files already cleaned up during extraction
|
|
|
|
# Final status report
|
|
echo "$(date): Bootstrap completed!"
|
|
echo "$(date): Final service status:"
|
|
docker ps
|
|
echo "$(date): Available disk space after cleanup:"
|
|
df -h
|
|
echo "$(date): Memory usage:"
|
|
free -h
|
|
|
|
echo "$(date): Services are available at:"
|
|
# 169.254.169.254 is the AWS Instance Metadata Service (IMDS) endpoint
|
|
# It provides instance metadata including the public IP address
|
|
echo " - Tile server: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):8080/tile/{z}/{x}/{y}.png"
|
|
echo " - Geocoding: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):8085/"
|
|
echo " - OSRM Car: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):5000/"
|
|
echo " - OSRM Bike: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):5001/"
|
|
echo " - OSRM Foot: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):5002/"
|
|
|
|
echo "$(date): Bootstrap script completed successfully!"
|
|
EOF
|
|
|
|
# Make bootstrap script executable and run it in background
|
|
- chmod +x /root/bootstrap.sh
|
|
- nohup /root/bootstrap.sh > /var/log/webarena-map-bootstrap.log 2>&1 &
|
|
|
|
# Write completion marker
|
|
write_files:
|
|
- path: /root/cloud-init-completed
|
|
content: |
|
|
Cloud-init completed at $(date)
|
|
Bootstrap script started in background
|
|
Check /var/log/webarena-map-bootstrap.log for progress
|
|
permissions: '0644'
|
|
|
|
final_message: |
|
|
WebArena map server cloud-init completed.
|
|
Bootstrap script is running in background.
|
|
Check /var/log/webarena-map-bootstrap.log for progress.
|
|
Services will be available at:
|
|
- Tiles: http://<instance-ip>:8080/tile/{z}/{x}/{y}.png
|
|
- Geocoding: http://<instance-ip>:8085/
|
|
- Routing: http://<instance-ip>:5000 (car), :5001 (bike), :5002 (foot)
|