Restore to commit 74e578279624c6045ca440a3459ebfa1f8d54191

This commit is contained in:
southseact-3d
2026-02-07 20:32:41 +00:00
commit ed67b7741b
252 changed files with 99814 additions and 0 deletions

206
scripts/healthcheck.sh Normal file
View File

@@ -0,0 +1,206 @@
#!/bin/bash
# Enhanced health check script for Shopify AI App Builder container
# Checks both ttyd (port 4001) and chat service (port 4000)
# Provides detailed diagnostics for debugging
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Diagnostic log location
DIAG_LOG_DIR="/var/log/shopify-ai"
DIAG_LOG_FILE="${DIAG_LOG_DIR}/healthcheck.log"
mkdir -p "$DIAG_LOG_DIR"
# Health check logging
health_log() {
local level="$1"
shift
local message="$*"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
# Log to file
echo "[${timestamp}] [${level}] ${message}" >> "$DIAG_LOG_FILE"
# Log to stdout for Docker health check
echo "${message}"
}
# Port checking function
check_port() {
local port="$1"
local service="$2"
health_log "INFO" "Checking ${service} on port ${port}..."
# Check using ss (modern alternative to netstat)
if command -v ss &>/dev/null; then
if ss -tuln 2>/dev/null | grep -q ":${port} "; then
health_log "INFO" "${service} is listening on port ${port}"
else
health_log "ERROR" "${service} is NOT listening on port ${port}"
return 1
fi
# Fallback to netstat if ss not available
elif command -v netstat &>/dev/null; then
if netstat -tuln 2>/dev/null | grep -q ":${port} "; then
health_log "INFO" "${service} is listening on port ${port}"
else
health_log "ERROR" "${service} is NOT listening on port ${port}"
return 1
fi
else
health_log "WARN" "Neither ss nor netstat available for port checking"
return 1
fi
}
# HTTP endpoint checking function
check_http() {
local url="$1"
local service="$2"
local timeout="${3:-3}"
health_log "INFO" "Checking ${service} HTTP endpoint: ${url}"
if command -v timeout &>/dev/null; then
if timeout "${timeout}" curl -s -o /dev/null -w "%{http_code}" "${url}" 2>&1 | grep -q "200\|302"; then
health_log "INFO" "${service} HTTP endpoint responding (HTTP 200/302)"
return 0
else
health_log "ERROR" "${service} HTTP endpoint NOT responding (timeout: ${timeout}s)"
return 1
fi
else
health_log "WARN" "timeout command not available for HTTP check"
return 1
fi
}
# Process checking function
check_process() {
local service="$1"
local port="$2"
health_log "INFO" "Checking ${service} process..."
# Find process listening on port
local pid=""
if command -v ss &>/dev/null; then
pid=$(ss -tulnp 2>/dev/null | grep ":${port} " | awk '{print $7}' | cut -d',' -f1)
elif command -v lsof &>/dev/null; then
pid=$(lsof -ti ":${port}" 2>/dev/null)
fi
if [ -n "$pid" ]; then
health_log "INFO" "${service} process running (PID: ${pid})"
# Check process memory usage
if [ -f "/proc/${pid}/status" ]; then
local mem_mb=$(awk '/VmRSS/ {printf "%.2f MB", $2/1024}' "/proc/${pid}/status")
health_log "INFO" " Memory usage: ${mem_mb}"
local cpu_time=$(awk '/utime|stime/ {sum+=$2} END {printf "%.2f seconds", sum/100}' "/proc/${pid}/status")
health_log "INFO" " CPU time: ${cpu_time}"
fi
return 0
else
health_log "ERROR" "${service} process NOT found"
return 1
fi
}
# System resource check
check_resources() {
health_log "INFO" "=== System Resources ==="
# Memory
if command -v free &>/dev/null; then
local mem_total=$(free -m | awk '/Mem:/ {print $2}')
local mem_used=$(free -m | awk '/Mem:/ {print $3}')
local mem_percent=$(( (mem_used * 100) / mem_total ))
health_log "INFO" "Memory: ${mem_used}MB / ${mem_total}MB (${mem_percent}%)"
if [ $mem_percent -gt 90 ]; then
health_log "WARN" "⚠ High memory usage: ${mem_percent}%"
fi
fi
# Disk
if command -v df &>/dev/null; then
local disk_usage=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')
health_log "INFO" "Disk: ${disk_usage}% used"
if [ "$disk_usage" -gt 80 ]; then
health_log "WARN" "⚠ High disk usage: ${disk_usage}%"
fi
fi
# Load average
if command -v uptime &>/dev/null; then
local load_avg=$(uptime | awk -F'load average:' '{print $2}' | xargs)
health_log "INFO" "Load average: ${load_avg}"
fi
}
# Main health check sequence
main() {
local exit_code=0
health_log "INFO" "========== HEALTH CHECK START =========="
health_log "INFO" "Timestamp: $(date '+%Y-%m-%d %H:%M:%S %Z')"
# Check system resources
check_resources
# Check chat service (port 4000)
health_log "INFO" "=== Chat Service (port 4000) ==="
if ! check_port 4000 "chat service"; then
exit_code=1
fi
if ! check_http "http://localhost:4000/api/health" "chat service" 3; then
exit_code=1
fi
if ! check_process "chat service" 4000; then
exit_code=1
fi
# Check ttyd service (port 4001) - proxy running, ttyd starts on-demand
health_log "INFO" "=== TTYD Proxy Service (port 4001) ==="
if ! check_port 4001 "ttyd-proxy"; then
exit_code=1
fi
# Check if proxy responds ( ttyd may not be running yet - that's OK )
if ! check_http "http://localhost:4001/" "ttyd-proxy" 10; then
exit_code=1
fi
# Check proxy process (not ttyd - ttyd starts on-demand)
if ! check_process "ttyd-proxy" 4001; then
exit_code=1
fi
# Optionally log that ttyd starts on-demand
health_log "INFO" "ttyd-proxy active (ttyd starts on-demand when visited)"
health_log "INFO" "========== HEALTH CHECK END ==========="
if [ $exit_code -eq 0 ]; then
health_log "INFO" "✓ Health check PASSED"
else
health_log "ERROR" "✗ Health check FAILED"
fi
return $exit_code
}
# Run main function
main "$@"