#!/bin/bash # Diagnostic logging utility for Shopify AI App Builder container # Provides comprehensive system and application diagnostics set -e # Colors for log output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Diagnostic log location DIAG_LOG_DIR="/var/log/shopify-ai" DIAG_LOG_FILE="${DIAG_LOG_DIR}/diagnostics.log" mkdir -p "$DIAG_LOG_DIR" # Logging function with timestamps and levels diag_log() { local level="$1" shift local message="$*" local timestamp=$(date '+%Y-%m-%d %H:%M:%S.%3N') local pid=$$ # Log to file echo "[${timestamp}] [${level}] [PID:${pid}] ${message}" >> "$DIAG_LOG_FILE" # Log to stderr with colors case "$level" in ERROR) echo -e "${RED}[${timestamp}] [ERROR] ${message}${NC}" >&2 ;; WARN) echo -e "${YELLOW}[${timestamp}] [WARN] ${message}${NC}" >&2 ;; INFO) echo -e "${GREEN}[${timestamp}] [INFO] ${message}${NC}" >&2 ;; DEBUG) echo -e "${BLUE}[${timestamp}] [DEBUG] ${message}${NC}" >&2 ;; esac } # System information gathering log_system_info() { diag_log "INFO" "========== SYSTEM DIAGNOSTIC START ==========" # OS Information diag_log "INFO" "=== OS Information ===" diag_log "INFO" "Kernel: $(uname -r)" diag_log "INFO" "Hostname: $(hostname)" diag_log "INFO" "Uptime: $(uptime -p 2>/dev/null || uptime)" # CPU Information diag_log "INFO" "=== CPU Information ===" if [ -f /proc/cpuinfo ]; then local cpu_count=$(nproc) local cpu_model=$(grep -m1 "model name" /proc/cpuinfo | cut -d':' -f2 | xargs) diag_log "INFO" "CPUs: ${cpu_count}" diag_log "INFO" "Model: ${cpu_model}" fi # Memory Information diag_log "INFO" "=== Memory Information ===" if [ -f /proc/meminfo ]; then local total_mem=$(awk '/MemTotal/ {printf "%.2f GB", $2/1024/1024}' /proc/meminfo) local free_mem=$(awk '/MemAvailable/ {printf "%.2f GB", $2/1024/1024}' /proc/meminfo) local used_mem=$(awk 'BEGIN{printf "%.2f GB", '"${total_mem}"' - '"${free_mem}"'}') diag_log "INFO" "Total: ${total_mem}" diag_log "INFO" "Used: ${used_mem}" diag_log "INFO" "Available: ${free_mem}" fi # Disk Information diag_log "INFO" "=== Disk Information ===" df -h / | while read line; do diag_log "INFO" "$line" done # Network Information diag_log "INFO" "=== Network Information ===" ip -4 addr show | grep -oP '(?<=inet\s)\d+(\.\d+){3}' | while read ip; do diag_log "INFO" "IPv4: ${ip}" done } # Service status checking check_service_status() { local service_name="$1" local port="$2" local pid="$3" diag_log "INFO" "=== Service: ${service_name} ===" # Check if process is running if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then diag_log "INFO" "Process running (PID: ${pid})" # Check memory usage if [ -f "/proc/${pid}/status" ]; then local mem_usage=$(awk '/VmRSS/ {printf "%.2f MB", $2/1024}' "/proc/${pid}/status") local cpu_usage=$(awk '/utime|stime/ {sum+=$2} END {printf "%.2f seconds", sum/100}' "/proc/${pid}/status") diag_log "INFO" "Memory: ${mem_usage}" diag_log "INFO" "CPU Time: ${cpu_usage}" fi else diag_log "WARN" "Process not running (PID: ${pid})" fi # Check if port is listening if ss -tuln 2>/dev/null | grep -q ":${port}"; then diag_log "INFO" "Port ${port} listening" else diag_log "ERROR" "Port ${port} NOT listening" fi # Check if service responds to HTTP requests if [ "$port" = "4500" ]; then if timeout 3 curl -s http://localhost:${port}/api/health > /dev/null 2>&1; then diag_log "INFO" "HTTP endpoint responding" else diag_log "ERROR" "HTTP endpoint NOT responding" fi fi } # Resource monitoring (can be called periodically) monitor_resources() { local timestamp=$(date '+%Y-%m-%d %H:%M:%S') # CPU usage local cpu_usage=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print $1}') # Memory usage local mem_total=$(free -m | awk '/Mem:/ {print $2}') local mem_used=$(free -m | awk '/Mem:/ {print $3}') local mem_percent=$(( (mem_used * 100) / mem_total )) # Disk usage local disk_usage=$(df / | tail -1 | awk '{print $5}') # Load average local load_avg=$(uptime | awk -F'load average:' '{print $2}') diag_log "INFO" "[MONITOR] CPU: ${cpu_usage}% | MEM: ${mem_percent}% (${mem_used}MB/${mem_total}MB) | DISK: ${disk_usage} | LOAD: ${load_avg}" } # Environment variable validation validate_environment() { diag_log "INFO" "=== Environment Validation ===" local critical_vars=( "OPENCODE_API_KEY" "SESSION_SECRET" "ACCESS_PASSWORD" ) local optional_vars=( "OPENROUTER_API_KEY" "MISTRAL_API_KEY" "GROQ_API_KEY" "GOOGLE_API_KEY" "DODO_PAYMENTS_API_KEY" "ADMIN_USER" "ADMIN_PASSWORD" "REPO_URL" "REPO_BRANCH" ) # Check critical variables for var in "${critical_vars[@]}"; do if [ -z "${!var}" ]; then diag_log "ERROR" "Missing critical variable: ${var}" else diag_log "INFO" "✓ ${var}: SET" fi done # Check optional variables for var in "${optional_vars[@]}"; do if [ -n "${!var}" ]; then diag_log "INFO" "✓ ${var}: SET" else diag_log "DEBUG" "${var}: NOT SET (optional)" fi done # Check filesystem permissions diag_log "INFO" "=== Filesystem Permissions ===" local data_dir="/home/web/data" if [ -d "$data_dir" ]; then local perms=$(stat -c "%a" "$data_dir") local owner=$(stat -c "%U:%G" "$data_dir") diag_log "INFO" "${data_dir}: ${perms} (${owner})" else diag_log "WARN" "${data_dir}: NOT FOUND" fi } # Log rotation rotate_logs() { local max_size=$(( 10 * 1024 * 1024 )) # 10 MB if [ -f "$DIAG_LOG_FILE" ]; then local file_size=$(stat -f%z "$DIAG_LOG_FILE" 2>/dev/null || stat -c%s "$DIAG_LOG_FILE" 2>/dev/null || echo 0) if [ "$file_size" -gt "$max_size" ]; then local backup_file="${DIAG_LOG_FILE}.$(date '+%Y%m%d_%H%M%S').bak" mv "$DIAG_LOG_FILE" "$backup_file" diag_log "INFO" "Log rotated to ${backup_file}" fi fi } # Export functions for use in other scripts export -f diag_log export -f log_system_info export -f check_service_status export -f monitor_resources export -f validate_environment export -f rotate_logs export DIAG_LOG_DIR export DIAG_LOG_FILE