The scripts were hardcoded to look for OpenSMTPD at a custom workspace path, but the Dockerfile installs it via apt to /usr/sbin/smtpd. This change adds fallback logic to check multiple locations: 1. Custom workspace path (for backward compatibility) 2. System path /usr/sbin/smtpd 3. Anywhere in PATH Also adds graceful handling when OpenSMTPD is not installed, logging an informative message instead of failing with "No such file or directory".
238 lines
7.2 KiB
Bash
238 lines
7.2 KiB
Bash
#!/bin/bash
|
|
# Enhanced health check script for Shopify AI App Builder container
|
|
# Checks chat service (port 4500)
|
|
# Provides detailed diagnostics for debugging
|
|
|
|
set -e
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
# Diagnostic log location
|
|
DIAG_LOG_DIR="/var/log/shopify-ai"
|
|
DIAG_LOG_FILE="${DIAG_LOG_DIR}/healthcheck.log"
|
|
mkdir -p "$DIAG_LOG_DIR"
|
|
|
|
# Health check logging
|
|
health_log() {
|
|
local level="$1"
|
|
shift
|
|
local message="$*"
|
|
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
|
# Log to file
|
|
echo "[${timestamp}] [${level}] ${message}" >> "$DIAG_LOG_FILE"
|
|
|
|
# Log to stdout for Docker health check
|
|
echo "${message}"
|
|
}
|
|
|
|
# Port checking function
|
|
check_port() {
|
|
local port="$1"
|
|
local service="$2"
|
|
|
|
health_log "INFO" "Checking ${service} on port ${port}..."
|
|
|
|
# Check using ss (modern alternative to netstat)
|
|
if command -v ss &>/dev/null; then
|
|
if ss -tuln 2>/dev/null | grep -q ":${port} "; then
|
|
health_log "INFO" "✓ ${service} is listening on port ${port}"
|
|
else
|
|
health_log "ERROR" "✗ ${service} is NOT listening on port ${port}"
|
|
return 1
|
|
fi
|
|
# Fallback to netstat if ss not available
|
|
elif command -v netstat &>/dev/null; then
|
|
if netstat -tuln 2>/dev/null | grep -q ":${port} "; then
|
|
health_log "INFO" "✓ ${service} is listening on port ${port}"
|
|
else
|
|
health_log "ERROR" "✗ ${service} is NOT listening on port ${port}"
|
|
return 1
|
|
fi
|
|
else
|
|
health_log "WARN" "Neither ss nor netstat available for port checking"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# HTTP endpoint checking function
|
|
check_http() {
|
|
local url="$1"
|
|
local service="$2"
|
|
local timeout="${3:-3}"
|
|
|
|
health_log "INFO" "Checking ${service} HTTP endpoint: ${url}"
|
|
|
|
if command -v timeout &>/dev/null; then
|
|
if timeout "${timeout}" curl -s -o /dev/null -w "%{http_code}" "${url}" 2>&1 | grep -q "200\|302"; then
|
|
health_log "INFO" "✓ ${service} HTTP endpoint responding (HTTP 200/302)"
|
|
return 0
|
|
else
|
|
health_log "ERROR" "✗ ${service} HTTP endpoint NOT responding (timeout: ${timeout}s)"
|
|
return 1
|
|
fi
|
|
else
|
|
health_log "WARN" "timeout command not available for HTTP check"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Process checking function
|
|
check_process() {
|
|
local service="$1"
|
|
local port="$2"
|
|
|
|
health_log "INFO" "Checking ${service} process..."
|
|
|
|
# Find process listening on port
|
|
local pid=""
|
|
if command -v ss &>/dev/null; then
|
|
pid=$(ss -tulnp 2>/dev/null | grep ":${port} " | awk '{print $7}' | cut -d',' -f1)
|
|
elif command -v lsof &>/dev/null; then
|
|
pid=$(lsof -ti ":${port}" 2>/dev/null)
|
|
fi
|
|
|
|
if [ -n "$pid" ]; then
|
|
health_log "INFO" "✓ ${service} process running (PID: ${pid})"
|
|
|
|
# Check process memory usage
|
|
if [ -f "/proc/${pid}/status" ]; then
|
|
local mem_mb=$(awk '/VmRSS/ {printf "%.2f MB", $2/1024}' "/proc/${pid}/status")
|
|
health_log "INFO" " Memory usage: ${mem_mb}"
|
|
|
|
local cpu_time=$(awk '/utime|stime/ {sum+=$2} END {printf "%.2f seconds", sum/100}' "/proc/${pid}/status")
|
|
health_log "INFO" " CPU time: ${cpu_time}"
|
|
fi
|
|
return 0
|
|
else
|
|
health_log "ERROR" "✗ ${service} process NOT found"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# System resource check
|
|
check_resources() {
|
|
health_log "INFO" "=== System Resources ==="
|
|
|
|
# Memory
|
|
if command -v free &>/dev/null; then
|
|
local mem_total=$(free -m | awk '/Mem:/ {print $2}')
|
|
local mem_used=$(free -m | awk '/Mem:/ {print $3}')
|
|
local mem_percent=$(( (mem_used * 100) / mem_total ))
|
|
health_log "INFO" "Memory: ${mem_used}MB / ${mem_total}MB (${mem_percent}%)"
|
|
|
|
if [ $mem_percent -gt 90 ]; then
|
|
health_log "WARN" "⚠ High memory usage: ${mem_percent}%"
|
|
fi
|
|
fi
|
|
|
|
# Disk
|
|
if command -v df &>/dev/null; then
|
|
local disk_usage=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')
|
|
health_log "INFO" "Disk: ${disk_usage}% used"
|
|
|
|
if [ "$disk_usage" -gt 80 ]; then
|
|
health_log "WARN" "⚠ High disk usage: ${disk_usage}%"
|
|
fi
|
|
fi
|
|
|
|
# Load average
|
|
if command -v uptime &>/dev/null; then
|
|
local load_avg=$(uptime | awk -F'load average:' '{print $2}' | xargs)
|
|
health_log "INFO" "Load average: ${load_avg}"
|
|
fi
|
|
}
|
|
|
|
# Main health check sequence
|
|
main() {
|
|
local exit_code=0
|
|
|
|
health_log "INFO" "========== HEALTH CHECK START =========="
|
|
health_log "INFO" "Timestamp: $(date '+%Y-%m-%d %H:%M:%S %Z')"
|
|
|
|
# Check system resources
|
|
check_resources
|
|
|
|
# Check chat service (port 4500)
|
|
health_log "INFO" "=== Chat Service (port 4500) ==="
|
|
if ! check_port 4500 "chat service"; then
|
|
exit_code=1
|
|
fi
|
|
|
|
if ! check_http "http://localhost:4500/api/health" "chat service" 3; then
|
|
exit_code=1
|
|
fi
|
|
|
|
if ! check_process "chat service" 4500; then
|
|
exit_code=1
|
|
fi
|
|
|
|
# Check OpenSMTPD service (ports 25, 587, 465)
|
|
health_log "INFO" "=== OpenSMTPD Service ==="
|
|
OPENSMTPD_CUSTOM_BINARY="/workspace/src/backend/app/opensmtpd/install/sbin/smtpd"
|
|
OPENSMTPD_SYSTEM_BINARY="/usr/sbin/smtpd"
|
|
|
|
# Determine which binary to use (prefer custom, fallback to system)
|
|
OPENSMTPD_BINARY=""
|
|
if [ -x "$OPENSMTPD_CUSTOM_BINARY" ]; then
|
|
OPENSMTPD_BINARY="$OPENSMTPD_CUSTOM_BINARY"
|
|
elif [ -x "$OPENSMTPD_SYSTEM_BINARY" ]; then
|
|
OPENSMTPD_BINARY="$OPENSMTPD_SYSTEM_BINARY"
|
|
elif command -v smtpd &>/dev/null; then
|
|
OPENSMTPD_BINARY="$(command -v smtpd)"
|
|
fi
|
|
|
|
if [ -n "$OPENSMTPD_BINARY" ]; then
|
|
# Check SMTP port 25
|
|
if check_port 25 "SMTP"; then
|
|
health_log "INFO" "✓ SMTP port 25 is listening"
|
|
else
|
|
health_log "WARN" "⚠ SMTP port 25 is not listening (may be normal if not using inbound email)"
|
|
fi
|
|
|
|
# Check SMTP submission port 587
|
|
if check_port 587 "SMTP Submission"; then
|
|
health_log "INFO" "✓ SMTP submission port 587 is listening"
|
|
else
|
|
health_log "WARN" "⚠ SMTP submission port 587 is not listening"
|
|
fi
|
|
|
|
# Check SMTPS port 465
|
|
if check_port 465 "SMTPS"; then
|
|
health_log "INFO" "✓ SMTPS port 465 is listening"
|
|
else
|
|
health_log "WARN" "⚠ SMTPS port 465 is not listening"
|
|
fi
|
|
|
|
# Check OpenSMTPD process
|
|
if pgrep -f "smtpd" > /dev/null; then
|
|
local smtpd_pid=$(pgrep -f "smtpd" | head -1)
|
|
health_log "INFO" "✓ OpenSMTPD process running (PID: ${smtpd_pid})"
|
|
else
|
|
health_log "WARN" "⚠ OpenSMTPD process not found (may be disabled)"
|
|
fi
|
|
else
|
|
health_log "INFO" "OpenSMTPD not installed (checked: $OPENSMTPD_CUSTOM_BINARY, $OPENSMTPD_SYSTEM_BINARY, PATH), skipping SMTP checks"
|
|
fi
|
|
|
|
# ttyd service has been removed, no longer checking port 4501
|
|
|
|
health_log "INFO" "========== HEALTH CHECK END ==========="
|
|
|
|
if [ $exit_code -eq 0 ]; then
|
|
health_log "INFO" "✓ Health check PASSED"
|
|
else
|
|
health_log "ERROR" "✗ Health check FAILED"
|
|
fi
|
|
|
|
return $exit_code
|
|
}
|
|
|
|
# Run main function
|
|
main "$@"
|