Files
shopify-ai-backup/scripts/healthcheck.sh
2026-02-09 12:28:30 +00:00

226 lines
6.7 KiB
Bash

#!/bin/bash
# Enhanced health check script for Shopify AI App Builder container
# Checks chat service (port 4500)
# Provides detailed diagnostics for debugging
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Diagnostic log location
DIAG_LOG_DIR="/var/log/shopify-ai"
DIAG_LOG_FILE="${DIAG_LOG_DIR}/healthcheck.log"
mkdir -p "$DIAG_LOG_DIR"
# Health check logging
health_log() {
local level="$1"
shift
local message="$*"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
# Log to file
echo "[${timestamp}] [${level}] ${message}" >> "$DIAG_LOG_FILE"
# Log to stdout for Docker health check
echo "${message}"
}
# Port checking function
check_port() {
local port="$1"
local service="$2"
health_log "INFO" "Checking ${service} on port ${port}..."
# Check using ss (modern alternative to netstat)
if command -v ss &>/dev/null; then
if ss -tuln 2>/dev/null | grep -q ":${port} "; then
health_log "INFO" "${service} is listening on port ${port}"
else
health_log "ERROR" "${service} is NOT listening on port ${port}"
return 1
fi
# Fallback to netstat if ss not available
elif command -v netstat &>/dev/null; then
if netstat -tuln 2>/dev/null | grep -q ":${port} "; then
health_log "INFO" "${service} is listening on port ${port}"
else
health_log "ERROR" "${service} is NOT listening on port ${port}"
return 1
fi
else
health_log "WARN" "Neither ss nor netstat available for port checking"
return 1
fi
}
# HTTP endpoint checking function
check_http() {
local url="$1"
local service="$2"
local timeout="${3:-3}"
health_log "INFO" "Checking ${service} HTTP endpoint: ${url}"
if command -v timeout &>/dev/null; then
if timeout "${timeout}" curl -s -o /dev/null -w "%{http_code}" "${url}" 2>&1 | grep -q "200\|302"; then
health_log "INFO" "${service} HTTP endpoint responding (HTTP 200/302)"
return 0
else
health_log "ERROR" "${service} HTTP endpoint NOT responding (timeout: ${timeout}s)"
return 1
fi
else
health_log "WARN" "timeout command not available for HTTP check"
return 1
fi
}
# Process checking function
check_process() {
local service="$1"
local port="$2"
health_log "INFO" "Checking ${service} process..."
# Find process listening on port
local pid=""
if command -v ss &>/dev/null; then
pid=$(ss -tulnp 2>/dev/null | grep ":${port} " | awk '{print $7}' | cut -d',' -f1)
elif command -v lsof &>/dev/null; then
pid=$(lsof -ti ":${port}" 2>/dev/null)
fi
if [ -n "$pid" ]; then
health_log "INFO" "${service} process running (PID: ${pid})"
# Check process memory usage
if [ -f "/proc/${pid}/status" ]; then
local mem_mb=$(awk '/VmRSS/ {printf "%.2f MB", $2/1024}' "/proc/${pid}/status")
health_log "INFO" " Memory usage: ${mem_mb}"
local cpu_time=$(awk '/utime|stime/ {sum+=$2} END {printf "%.2f seconds", sum/100}' "/proc/${pid}/status")
health_log "INFO" " CPU time: ${cpu_time}"
fi
return 0
else
health_log "ERROR" "${service} process NOT found"
return 1
fi
}
# System resource check
check_resources() {
health_log "INFO" "=== System Resources ==="
# Memory
if command -v free &>/dev/null; then
local mem_total=$(free -m | awk '/Mem:/ {print $2}')
local mem_used=$(free -m | awk '/Mem:/ {print $3}')
local mem_percent=$(( (mem_used * 100) / mem_total ))
health_log "INFO" "Memory: ${mem_used}MB / ${mem_total}MB (${mem_percent}%)"
if [ $mem_percent -gt 90 ]; then
health_log "WARN" "⚠ High memory usage: ${mem_percent}%"
fi
fi
# Disk
if command -v df &>/dev/null; then
local disk_usage=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')
health_log "INFO" "Disk: ${disk_usage}% used"
if [ "$disk_usage" -gt 80 ]; then
health_log "WARN" "⚠ High disk usage: ${disk_usage}%"
fi
fi
# Load average
if command -v uptime &>/dev/null; then
local load_avg=$(uptime | awk -F'load average:' '{print $2}' | xargs)
health_log "INFO" "Load average: ${load_avg}"
fi
}
# Main health check sequence
main() {
local exit_code=0
health_log "INFO" "========== HEALTH CHECK START =========="
health_log "INFO" "Timestamp: $(date '+%Y-%m-%d %H:%M:%S %Z')"
# Check system resources
check_resources
# Check chat service (port 4500)
health_log "INFO" "=== Chat Service (port 4500) ==="
if ! check_port 4500 "chat service"; then
exit_code=1
fi
if ! check_http "http://localhost:4500/api/health" "chat service" 3; then
exit_code=1
fi
if ! check_process "chat service" 4500; then
exit_code=1
fi
# Check OpenSMTPD service (ports 25, 587, 465)
health_log "INFO" "=== OpenSMTPD Service ==="
OPENSMTPD_BINARY="/workspace/src/backend/app/opensmtpd/install/sbin/smtpd"
if [ -x "$OPENSMTPD_BINARY" ]; then
# Check SMTP port 25
if check_port 25 "SMTP"; then
health_log "INFO" "✓ SMTP port 25 is listening"
else
health_log "WARN" "⚠ SMTP port 25 is not listening (may be normal if not using inbound email)"
fi
# Check SMTP submission port 587
if check_port 587 "SMTP Submission"; then
health_log "INFO" "✓ SMTP submission port 587 is listening"
else
health_log "WARN" "⚠ SMTP submission port 587 is not listening"
fi
# Check SMTPS port 465
if check_port 465 "SMTPS"; then
health_log "INFO" "✓ SMTPS port 465 is listening"
else
health_log "WARN" "⚠ SMTPS port 465 is not listening"
fi
# Check OpenSMTPD process
if pgrep -f "smtpd" > /dev/null; then
local smtpd_pid=$(pgrep -f "smtpd" | head -1)
health_log "INFO" "✓ OpenSMTPD process running (PID: ${smtpd_pid})"
else
health_log "WARN" "⚠ OpenSMTPD process not found (may be disabled)"
fi
else
health_log "INFO" "OpenSMTPD not installed at expected location, skipping SMTP checks"
fi
# ttyd service has been removed, no longer checking port 4501
health_log "INFO" "========== HEALTH CHECK END ==========="
if [ $exit_code -eq 0 ]; then
health_log "INFO" "✓ Health check PASSED"
else
health_log "ERROR" "✗ Health check FAILED"
fi
return $exit_code
}
# Run main function
main "$@"