Files
shopify-ai-backup/scripts/diagnostic-logger.sh

225 lines
6.7 KiB
Bash

#!/bin/bash
# Diagnostic logging utility for Shopify AI App Builder container
# Provides comprehensive system and application diagnostics
set -e
# Colors for log output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Diagnostic log location
DIAG_LOG_DIR="/var/log/shopify-ai"
DIAG_LOG_FILE="${DIAG_LOG_DIR}/diagnostics.log"
mkdir -p "$DIAG_LOG_DIR"
# Logging function with timestamps and levels
diag_log() {
local level="$1"
shift
local message="$*"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S.%3N')
local pid=$$
# Log to file
echo "[${timestamp}] [${level}] [PID:${pid}] ${message}" >> "$DIAG_LOG_FILE"
# Log to stderr with colors
case "$level" in
ERROR)
echo -e "${RED}[${timestamp}] [ERROR] ${message}${NC}" >&2
;;
WARN)
echo -e "${YELLOW}[${timestamp}] [WARN] ${message}${NC}" >&2
;;
INFO)
echo -e "${GREEN}[${timestamp}] [INFO] ${message}${NC}" >&2
;;
DEBUG)
echo -e "${BLUE}[${timestamp}] [DEBUG] ${message}${NC}" >&2
;;
esac
}
# System information gathering
log_system_info() {
diag_log "INFO" "========== SYSTEM DIAGNOSTIC START =========="
# OS Information
diag_log "INFO" "=== OS Information ==="
diag_log "INFO" "Kernel: $(uname -r)"
diag_log "INFO" "Hostname: $(hostname)"
diag_log "INFO" "Uptime: $(uptime -p 2>/dev/null || uptime)"
# CPU Information
diag_log "INFO" "=== CPU Information ==="
if [ -f /proc/cpuinfo ]; then
local cpu_count=$(nproc)
local cpu_model=$(grep -m1 "model name" /proc/cpuinfo | cut -d':' -f2 | xargs)
diag_log "INFO" "CPUs: ${cpu_count}"
diag_log "INFO" "Model: ${cpu_model}"
fi
# Memory Information
diag_log "INFO" "=== Memory Information ==="
if [ -f /proc/meminfo ]; then
local total_mem=$(awk '/MemTotal/ {printf "%.2f GB", $2/1024/1024}' /proc/meminfo)
local free_mem=$(awk '/MemAvailable/ {printf "%.2f GB", $2/1024/1024}' /proc/meminfo)
local used_mem=$(awk 'BEGIN{printf "%.2f GB", '"${total_mem}"' - '"${free_mem}"'}')
diag_log "INFO" "Total: ${total_mem}"
diag_log "INFO" "Used: ${used_mem}"
diag_log "INFO" "Available: ${free_mem}"
fi
# Disk Information
diag_log "INFO" "=== Disk Information ==="
df -h / | while read line; do
diag_log "INFO" "$line"
done
# Network Information
diag_log "INFO" "=== Network Information ==="
ip -4 addr show | grep -oP '(?<=inet\s)\d+(\.\d+){3}' | while read ip; do
diag_log "INFO" "IPv4: ${ip}"
done
}
# Service status checking
check_service_status() {
local service_name="$1"
local port="$2"
local pid="$3"
diag_log "INFO" "=== Service: ${service_name} ==="
# Check if process is running
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
diag_log "INFO" "Process running (PID: ${pid})"
# Check memory usage
if [ -f "/proc/${pid}/status" ]; then
local mem_usage=$(awk '/VmRSS/ {printf "%.2f MB", $2/1024}' "/proc/${pid}/status")
local cpu_usage=$(awk '/utime|stime/ {sum+=$2} END {printf "%.2f seconds", sum/100}' "/proc/${pid}/status")
diag_log "INFO" "Memory: ${mem_usage}"
diag_log "INFO" "CPU Time: ${cpu_usage}"
fi
else
diag_log "WARN" "Process not running (PID: ${pid})"
fi
# Check if port is listening
if ss -tuln 2>/dev/null | grep -q ":${port}"; then
diag_log "INFO" "Port ${port} listening"
else
diag_log "ERROR" "Port ${port} NOT listening"
fi
# Check if service responds to HTTP requests
if [ "$port" = "4000" ]; then
if timeout 3 curl -s http://localhost:${port}/api/health > /dev/null 2>&1; then
diag_log "INFO" "HTTP endpoint responding"
else
diag_log "ERROR" "HTTP endpoint NOT responding"
fi
fi
}
# Resource monitoring (can be called periodically)
monitor_resources() {
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
# CPU usage
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print $1}')
# Memory usage
local mem_total=$(free -m | awk '/Mem:/ {print $2}')
local mem_used=$(free -m | awk '/Mem:/ {print $3}')
local mem_percent=$(( (mem_used * 100) / mem_total ))
# Disk usage
local disk_usage=$(df / | tail -1 | awk '{print $5}')
# Load average
local load_avg=$(uptime | awk -F'load average:' '{print $2}')
diag_log "INFO" "[MONITOR] CPU: ${cpu_usage}% | MEM: ${mem_percent}% (${mem_used}MB/${mem_total}MB) | DISK: ${disk_usage} | LOAD: ${load_avg}"
}
# Environment variable validation
validate_environment() {
diag_log "INFO" "=== Environment Validation ==="
local critical_vars=(
"OPENCODE_API_KEY"
"SESSION_SECRET"
"ACCESS_PASSWORD"
)
local optional_vars=(
"OPENROUTER_API_KEY"
"MISTRAL_API_KEY"
"GROQ_API_KEY"
"GOOGLE_API_KEY"
"DODO_PAYMENTS_API_KEY"
"ADMIN_USER"
"ADMIN_PASSWORD"
"REPO_URL"
"REPO_BRANCH"
)
# Check critical variables
for var in "${critical_vars[@]}"; do
if [ -z "${!var}" ]; then
diag_log "ERROR" "Missing critical variable: ${var}"
else
diag_log "INFO" "${var}: SET"
fi
done
# Check optional variables
for var in "${optional_vars[@]}"; do
if [ -n "${!var}" ]; then
diag_log "INFO" "${var}: SET"
else
diag_log "DEBUG" "${var}: NOT SET (optional)"
fi
done
# Check filesystem permissions
diag_log "INFO" "=== Filesystem Permissions ==="
local data_dir="/home/web/data"
if [ -d "$data_dir" ]; then
local perms=$(stat -c "%a" "$data_dir")
local owner=$(stat -c "%U:%G" "$data_dir")
diag_log "INFO" "${data_dir}: ${perms} (${owner})"
else
diag_log "WARN" "${data_dir}: NOT FOUND"
fi
}
# Log rotation
rotate_logs() {
local max_size=$(( 10 * 1024 * 1024 )) # 10 MB
if [ -f "$DIAG_LOG_FILE" ]; then
local file_size=$(stat -f%z "$DIAG_LOG_FILE" 2>/dev/null || stat -c%s "$DIAG_LOG_FILE" 2>/dev/null || echo 0)
if [ "$file_size" -gt "$max_size" ]; then
local backup_file="${DIAG_LOG_FILE}.$(date '+%Y%m%d_%H%M%S').bak"
mv "$DIAG_LOG_FILE" "$backup_file"
diag_log "INFO" "Log rotated to ${backup_file}"
fi
fi
}
# Export functions for use in other scripts
export -f diag_log
export -f log_system_info
export -f check_service_status
export -f monitor_resources
export -f validate_environment
export -f rotate_logs
export DIAG_LOG_DIR
export DIAG_LOG_FILE