#!/bin/bash # Enhanced health check script for Shopify AI App Builder container # Checks chat service (port 4500) # Provides detailed diagnostics for debugging set -e # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # Diagnostic log location DIAG_LOG_DIR="/var/log/shopify-ai" DIAG_LOG_FILE="${DIAG_LOG_DIR}/healthcheck.log" mkdir -p "$DIAG_LOG_DIR" # Health check logging health_log() { local level="$1" shift local message="$*" local timestamp=$(date '+%Y-%m-%d %H:%M:%S') # Log to file echo "[${timestamp}] [${level}] ${message}" >> "$DIAG_LOG_FILE" # Log to stdout for Docker health check echo "${message}" } # Port checking function check_port() { local port="$1" local service="$2" health_log "INFO" "Checking ${service} on port ${port}..." # Check using ss (modern alternative to netstat) if command -v ss &>/dev/null; then if ss -tuln 2>/dev/null | grep -q ":${port} "; then health_log "INFO" "✓ ${service} is listening on port ${port}" else health_log "ERROR" "✗ ${service} is NOT listening on port ${port}" return 1 fi # Fallback to netstat if ss not available elif command -v netstat &>/dev/null; then if netstat -tuln 2>/dev/null | grep -q ":${port} "; then health_log "INFO" "✓ ${service} is listening on port ${port}" else health_log "ERROR" "✗ ${service} is NOT listening on port ${port}" return 1 fi else health_log "WARN" "Neither ss nor netstat available for port checking" return 1 fi } # HTTP endpoint checking function check_http() { local url="$1" local service="$2" local timeout="${3:-3}" health_log "INFO" "Checking ${service} HTTP endpoint: ${url}" if command -v timeout &>/dev/null; then if timeout "${timeout}" curl -s -o /dev/null -w "%{http_code}" "${url}" 2>&1 | grep -q "200\|302"; then health_log "INFO" "✓ ${service} HTTP endpoint responding (HTTP 200/302)" return 0 else health_log "ERROR" "✗ ${service} HTTP endpoint NOT responding (timeout: ${timeout}s)" return 1 fi else health_log "WARN" "timeout command not available for HTTP check" return 1 fi } # Process checking function check_process() { local service="$1" local port="$2" health_log "INFO" "Checking ${service} process..." # Find process listening on port local pid="" if command -v ss &>/dev/null; then pid=$(ss -tulnp 2>/dev/null | grep ":${port} " | awk '{print $7}' | cut -d',' -f1) elif command -v lsof &>/dev/null; then pid=$(lsof -ti ":${port}" 2>/dev/null) fi if [ -n "$pid" ]; then health_log "INFO" "✓ ${service} process running (PID: ${pid})" # Check process memory usage if [ -f "/proc/${pid}/status" ]; then local mem_mb=$(awk '/VmRSS/ {printf "%.2f MB", $2/1024}' "/proc/${pid}/status") health_log "INFO" " Memory usage: ${mem_mb}" local cpu_time=$(awk '/utime|stime/ {sum+=$2} END {printf "%.2f seconds", sum/100}' "/proc/${pid}/status") health_log "INFO" " CPU time: ${cpu_time}" fi return 0 else health_log "ERROR" "✗ ${service} process NOT found" return 1 fi } # System resource check check_resources() { health_log "INFO" "=== System Resources ===" # Memory if command -v free &>/dev/null; then local mem_total=$(free -m | awk '/Mem:/ {print $2}') local mem_used=$(free -m | awk '/Mem:/ {print $3}') local mem_percent=$(( (mem_used * 100) / mem_total )) health_log "INFO" "Memory: ${mem_used}MB / ${mem_total}MB (${mem_percent}%)" if [ $mem_percent -gt 90 ]; then health_log "WARN" "⚠ High memory usage: ${mem_percent}%" fi fi # Disk if command -v df &>/dev/null; then local disk_usage=$(df / | tail -1 | awk '{print $5}' | sed 's/%//') health_log "INFO" "Disk: ${disk_usage}% used" if [ "$disk_usage" -gt 80 ]; then health_log "WARN" "⚠ High disk usage: ${disk_usage}%" fi fi # Load average if command -v uptime &>/dev/null; then local load_avg=$(uptime | awk -F'load average:' '{print $2}' | xargs) health_log "INFO" "Load average: ${load_avg}" fi } # Main health check sequence main() { local exit_code=0 health_log "INFO" "========== HEALTH CHECK START ==========" health_log "INFO" "Timestamp: $(date '+%Y-%m-%d %H:%M:%S %Z')" # Check system resources check_resources # Check chat service (port 4500) health_log "INFO" "=== Chat Service (port 4500) ===" if ! check_port 4500 "chat service"; then exit_code=1 fi if ! check_http "http://localhost:4500/api/health" "chat service" 3; then exit_code=1 fi if ! check_process "chat service" 4500; then exit_code=1 fi # Check OpenSMTPD service (ports 25, 587, 465) health_log "INFO" "=== OpenSMTPD Service ===" REPO_DIR="${REPO_DIR:-/home/web/data}" OPENSMTPD_INSTALL_ROOT="/workspace/src/backend/app/opensmtpd/install" if [ -d "${REPO_DIR}/backend/app/opensmtpd" ]; then OPENSMTPD_INSTALL_ROOT="${REPO_DIR}/backend/app/opensmtpd/install" fi OPENSMTPD_CUSTOM_BINARY="${OPENSMTPD_INSTALL_ROOT}/sbin/smtpd" OPENSMTPD_SYSTEM_BINARY="/usr/sbin/smtpd" # Determine which binary to use (prefer custom, fallback to system) OPENSMTPD_BINARY="" if [ -x "$OPENSMTPD_CUSTOM_BINARY" ]; then OPENSMTPD_BINARY="$OPENSMTPD_CUSTOM_BINARY" elif [ -x "$OPENSMTPD_SYSTEM_BINARY" ]; then OPENSMTPD_BINARY="$OPENSMTPD_SYSTEM_BINARY" elif command -v smtpd &>/dev/null; then OPENSMTPD_BINARY="$(command -v smtpd)" fi if [ -n "$OPENSMTPD_BINARY" ]; then # Check SMTP port 25 if check_port 25 "SMTP"; then health_log "INFO" "✓ SMTP port 25 is listening" else health_log "WARN" "⚠ SMTP port 25 is not listening (may be normal if not using inbound email)" fi # Check SMTP submission port 587 if check_port 587 "SMTP Submission"; then health_log "INFO" "✓ SMTP submission port 587 is listening" else health_log "WARN" "⚠ SMTP submission port 587 is not listening" fi # Check SMTPS port 465 if check_port 465 "SMTPS"; then health_log "INFO" "✓ SMTPS port 465 is listening" else health_log "WARN" "⚠ SMTPS port 465 is not listening" fi # Check OpenSMTPD process if pgrep -f "smtpd" > /dev/null; then local smtpd_pid=$(pgrep -f "smtpd" | head -1) health_log "INFO" "✓ OpenSMTPD process running (PID: ${smtpd_pid})" else health_log "WARN" "⚠ OpenSMTPD process not found (may be disabled)" fi else health_log "INFO" "OpenSMTPD not installed (checked: $OPENSMTPD_CUSTOM_BINARY, $OPENSMTPD_SYSTEM_BINARY, PATH), skipping SMTP checks" fi # ttyd service has been removed, no longer checking port 4501 health_log "INFO" "========== HEALTH CHECK END ===========" if [ $exit_code -eq 0 ]; then health_log "INFO" "✓ Health check PASSED" else health_log "ERROR" "✗ Health check FAILED" fi return $exit_code } # Run main function main "$@"